######################################################################################
# Script: Naturalisation and Immigrant Earnings: Why and to Whom Citizenship Matters #
# Floris Peters, Hans Schmeets & Maarten Vink                                        #
# European Journal of Population                                                     #
######################################################################################


################
# Introduction #
################

# This file provides the syntax used to create all the tables and figures in the paper. 
# The dataset contains sensitive, micro level information. As such, for privacy reasons the data is only available to individuals employed at or affiliated to Statistics Netherlands. 
# The dataset can be found at the following location on the network of Statistics Netherlands: \\cbsp.nl\Productie\Projecten\SAL\209253UM_FP_SEC1\Werk\Floris\PhD\EJP_income 

#######################################################################################################################
#######################################################################################################################

#############
# Variables #
#############

# ID
# (Individual identification number)
 
# YEAR
# (Observation year)

# INCOME
# (Income from labor)

# EMPLOYMENT
# (Employment status)
# [0] Not employed;
# [1] Employed

# HOURS
# (Weekly working hours)

# SECTOR
# (Labour market sector, categorical)
# [1] Agriculture, forestry and fishery;
# [2] Non-housing industry and energy;
# [3] Transportation and communication;
# [4] Information and communication;
# [5] Financial services;
# [6] Rent and management of property;
# [7] Business services;
# [8] Public sector and care sector;
# [9] Culture, recreation and other

# NATURALISED
# (Naturalised - time-varying)
# [0] Not naturalised; 
# [1] Naturalised

# NATURALISATION
# (Naturalisation during the observation period)
# [0] No naturalisation during the observation period; 
# [1] Naturaisation during the observation period

# YSN_CAT
# (Years since naturalisation, categorical)
# [1] No naturalization;
# [2] >3 years prior to naturalization;
# [3] 3 years prior to naturalization; 
# [4] 2 years prior to naturalization;
# [5] 1 year prior to naturalization;
# [6] Year of naturalization;
# [7] 1 year after naturalization;
# [8] 2 years after naturalization;
# [9] 3 years after naturalization;
# [10] >3 years after naturalization

# YSN
# (Years since naturalization)

# YSM_NATURALISATION
# (Years since migration * Naturalisation during observation period)

# YSN_NATURALISED
# (Years since naturalisation * Naturalised)

# IMMIGRATIONYEAR
# (Migration cohorts - Year of first immigration to the Netherlands)

# GENDER
# [1] Male; 
# [2] Female

# AGEARRIVAL
# (Age at the moment of migration)

# AGECAT 
# (Age at the moment of migration in categories) 
# [1] 20-24 years; 
# [2] 25-29 years; 
# [3] 30-34 years; 
# [4] 35-39 years; 
# [5] 40-44 years; 
# [6] 45-50 years

# RESIDENCE
# (Years since migration)

# RESIDENCECAT
# (Years since migration in categories)
# [1] 0-1 years; 
# [2] 2-3 years; 
# [3] 4-5 years; 
# [4] 6-7 years; 
# [5]: 8-9 years

# PARTNER
# [1] No partner; 
# [2] Foreign born foreign partner; 
# [3] Foreign born Dutch partner; 
# [4] Native Dutch partner

# CHILD
# (Children in the household in categories)
# [0] no children <18 in household; 
# [1] Children <18 in household

# DEVELOPMENT
# (Human Development Index (HDI) score origin country)

# DEVELOPMENTREC_Q
# (Human Development Index (HDI) score origin country * 1000 in quartiles)
# [1] Lowest quartile; 
# [2] Second quartile;
# [3] Third quartile;
# [4] Fourth quartile

# EU
# [0] Not EU country of origin; 
# [1] EU country of origin

# REGION
# (Region of origin, categorical)
# [1] EU;
# [2] non-EU Europe, North America and Australia;
# [3] South America;
# [4] Africa;
# [5] Asia;
# [6] Middle-East

# TRUNCATION
# (Does the individual drop out of the dataset prematurely?)
# [0] Individual remains in the dataset for the entire observation period; 
# [1] Individual does not remain in the dataset for the entire observation period

# DISTANCE
# (Geographical distance between the origin country and the Netherlands)

#######################################################################################################################
#######################################################################################################################


#Upload the dataset (Data_cit_inc_main.sav) to R and load the necessary libraries#
library(Matrix)
library(optimx)
library(plm)
dataset_main <- read.csv(file.choose(),header=T,sep=";")

###########     
# Table 1 #
###########

#select male immigrants with employment
dataset_male_empl <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1)

#regression table 1: male immigrants with employment
result_Table_1_model_1_male <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                   RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                   data = dataset_male_empl, 
                                   model = 'within', 
                                   effect = 'individual', 
                                   index = c("ID","YEAR"))

#select female immigrants with employment
dataset_female_empl <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1)

#regression table 1: female immigrants with employment
result_Table_1_model_1_female <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                     RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                     data = dataset_female_empl, 
                                     model = 'within', 
                                     effect = 'individual', 
                                     index = c("ID","YEAR"))

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#regression table 1: male immigrants
result_Table_1_model_2_male <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                   RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                   data = dataset_male, 
                                   model = 'within', 
                                   effect = 'individual', 
                                   index = c("ID","YEAR"))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#regression table 1: female immigrants
result_Table_1_model_2_female <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                     RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                     data = dataset_female, 
                                     model = 'within', 
                                     effect = 'individual', 
                                     index = c("ID","YEAR"))


###########     
# Table 2 #
###########

#select male immigrants with employment
dataset_male_empl <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1)

#regression table 2: male immigrants with employment
result_Table_2_male <- plm(INCOME ~ as.factor(YSN_CAT) +
                           RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                           data = dataset_male_empl, 
                           model = 'within', 
                           effect = 'individual', 
                           index = c("ID","YEAR"))

#select female immigrants with employment
dataset_female_empl <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1)

#regression table 2: female immigrants with employment
result_Table_2_female <- plm(INCOME ~ as.factor(YSN_CAT) +
                             RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                             data = dataset_female_empl, 
                             model = 'within', 
                             effect = 'individual', 
                             index = c("ID","YEAR"))


###########     
# Table 3 #
###########

#select male immigrants with employment from specific regions of origin
dataset_male_empl_EU <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & REGION == 1)
dataset_male_empl_west <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & REGION == 2)
dataset_male_empl_SA <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & REGION == 3)
dataset_male_empl_africa <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & REGION == 4)
dataset_male_empl_asia <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & REGION == 5)
dataset_male_empl_ME <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & REGION == 6)

#regression table 3: male immigrants with employment from the EU
result_Table_3_male_EU <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_male_empl_EU, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))

#regression table 3: male immigrants with employment from non-EU Western countries
result_Table_3_male_west <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                data = dataset_male_empl_west, 
                                model = 'within', 
                                effect = 'individual', 
                                index = c("ID","YEAR"))

#regression table 3: male immigrants with employment from South America
result_Table_3_male_SA <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_male_empl_SA, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))

#regression table 3: male immigrants with employment from Africa
result_Table_3_male_africa <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_male_empl_africa, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))

#regression table 3: male immigrants with employment from Asia
result_Table_3_male_asia <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_male_empl_asia, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))

#regression table 3: male immigrants with employment from the Middle East
result_Table_3_male_ME <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_male_empl_ME, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))


###########     
# Table 4 #
###########

#select female immigrants with employment from specific regions of origin
dataset_female_empl_EU <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & REGION == 1)
dataset_female_empl_west <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & REGION == 2)
dataset_female_empl_SA <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & REGION == 3)
dataset_female_empl_africa <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & REGION == 4)
dataset_female_empl_asia <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & REGION == 5)
dataset_female_empl_ME <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & REGION == 6)

#regression table 4: female immigrants with employment from the EU
result_Table_4_female_EU <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                data = dataset_female_empl_EU, 
                                model = 'within', 
                                effect = 'individual', 
                                index = c("ID","YEAR"))

#regression table 4: female immigrants with employment from non-EU Western countries
result_Table_4_female_west <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                  RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                  data = dataset_female_empl_west, 
                                  model = 'within', 
                                  effect = 'individual', 
                                  index = c("ID","YEAR"))

#regression table 4: female immigrants with employment from South America
result_Table_4_female_SA <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                data = dataset_female_empl_SA, 
                                model = 'within', 
                                effect = 'individual', 
                                index = c("ID","YEAR"))

#regression table 4: female immigrants with employment from Africa
result_Table_4_female_africa <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                    RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                    data = dataset_female_empl_africa, 
                                    model = 'within', 
                                    effect = 'individual', 
                                    index = c("ID","YEAR"))

#regression table 4: female immigrants with employment from Asia
result_Table_4_female_asia <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                  RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                  data = dataset_female_empl_asia, 
                                  model = 'within', 
                                  effect = 'individual', 
                                  index = c("ID","YEAR"))

#regression table 4: female immigrants with employment from the Middle East
result_Table_4_female_ME <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                data = dataset_female_empl_ME, 
                                model = 'within', 
                                effect = 'individual', 
                                index = c("ID","YEAR"))


###########     
# Table 5 #
###########

#select male immigrants with employment whose working hours information is available
dataset_male_empl_hours <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & HOURS > 0)

#regression table 5: male immigrants with employment
result_Table_5_male_model_1 <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                   RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                   data = dataset_male_empl_hours, 
                                   model = 'within', 
                                   effect = 'individual', 
                                   index = c("ID","YEAR"))

#regression table 5: male immigrants with employment including working hours
result_Table_5_male_model_2 <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                   HOURS + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                   data = dataset_male_empl_hours, 
                                   model = 'within', 
                                   effect = 'individual', 
                                   index = c("ID","YEAR"))

#select female immigrants with employment whose working hours information is available
dataset_female_empl_hours <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & HOURS > 0)

#regression table 5: female immigrants with employment
result_Table_5_female_model_1 <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                     RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                     data = dataset_female_empl_hours, 
                                     model = 'within', 
                                     effect = 'individual', 
                                     index = c("ID","YEAR"))

#regression table 5: female immigrants with employment including working hours
result_Table_5_female_model_2 <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                     HOURS + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                     data = dataset_female_empl_hours, 
                                     model = 'within', 
                                     effect = 'individual', 
                                     index = c("ID","YEAR"))


###########     
# Table 6 #
###########

#select male immigrants with employment whose labour market sector information is available
dataset_male_empl_sector <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & SECTOR > 0)

#regression table 6: male immigrants with employment
result_Table_6_male_model_1 <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                   RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                   data = dataset_male_empl_sector, 
                                   model = 'within', 
                                   effect = 'individual', 
                                   index = c("ID","YEAR"))

#regression table 6: male immigrants with employment including labor market sector
result_Table_6_male_model_2 <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                   SECTOR + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                   data = dataset_male_empl_sector, 
                                   model = 'within', 
                                   effect = 'individual', 
                                   index = c("ID","YEAR"))


###########     
# Table 7 #
###########

#select female immigrants with employment whose whose labour market sector information is available
dataset_female_empl_sector <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & SECTOR > 0)

#regression table 7: female immigrants with employment
result_Table_7_female_model_1 <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                     RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                     data = dataset_female_empl_sector, 
                                     model = 'within', 
                                     effect = 'individual', 
                                     index = c("ID","YEAR"))

#regression table 7: female immigrants with employment including labor market sector
result_Table_7_female_model_2 <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                     SECTOR + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                     data = dataset_female_empl_sector, 
                                     model = 'within', 
                                     effect = 'individual', 
                                     index = c("ID","YEAR"))


###########     
# Table 8 #
###########

#select male immigrants with employment
dataset_male_empl <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1)

#create years since naturalization categories
dataset_male_empl_ysn_cat1 <- subset(dataset_male_empl, YSN_CAT == 1)
dataset_male_empl_ysn_cat2 <- subset(dataset_male_empl, YSN_CAT == 2)
dataset_male_empl_ysn_cat3 <- subset(dataset_male_empl, YSN_CAT == 3)
dataset_male_empl_ysn_cat4 <- subset(dataset_male_empl, YSN_CAT == 4)
dataset_male_empl_ysn_cat5 <- subset(dataset_male_empl, YSN_CAT == 5)
dataset_male_empl_ysn_cat6 <- subset(dataset_male_empl, YSN_CAT == 6)
dataset_male_empl_ysn_cat7 <- subset(dataset_male_empl, YSN_CAT == 7)
dataset_male_empl_ysn_cat8 <- subset(dataset_male_empl, YSN_CAT == 8)
dataset_male_empl_ysn_cat9 <- subset(dataset_male_empl, YSN_CAT == 9)
dataset_male_empl_ysn_cat10 <- subset(dataset_male_empl, YSN_CAT == 10)

#calculate mean, lower and upper 95% confidence intervals
#YSN_CAT == 1
mean_YSN_CAT_1 <- mean(dataset_male_empl_ysn_cat1$INCOME)
sample.n_YSN_CAT_1 <- length(dataset_male_empl_ysn_cat1$INCOME)
sample.sd_YSN_CAT_1 <- sd(dataset_male_empl_ysn_cat1$INCOME)
sample.se_YSN_CAT_1 <- sample.sd_YSN_CAT_1/sqrt(sample.n_YSN_CAT_1)
alpha = 0.05
degrees.freedom_YSN_CAT_1 = sample.n_YSN_CAT_1 - 1
t.score_YSN_CAT_1 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_1,lower.tail=F)
margin.error_YSN_CAT_1 <- t.score_YSN_CAT_1 * sample.se_YSN_CAT_1
lower.bound_YSN_CAT_1 <- mean_YSN_CAT_1 - margin.error_YSN_CAT_1
upper.bound_YSN_CAT_1 <- mean_YSN_CAT_1 + margin.error_YSN_CAT_1

#YSN_CAT == 2
mean_YSN_CAT_2 <- mean(dataset_male_empl_ysn_cat2$INCOME)
sample.n_YSN_CAT_2 <- length(dataset_male_empl_ysn_cat2$INCOME)
sample.sd_YSN_CAT_2 <- sd(dataset_male_empl_ysn_cat2$INCOME)
sample.se_YSN_CAT_2 <- sample.sd_YSN_CAT_2/sqrt(sample.n_YSN_CAT_2)
alpha = 0.05
degrees.freedom_YSN_CAT_2 = sample.n_YSN_CAT_2 - 1
t.score_YSN_CAT_2 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_2,lower.tail=F)
margin.error_YSN_CAT_2 <- t.score_YSN_CAT_2 * sample.se_YSN_CAT_2
lower.bound_YSN_CAT_2 <- mean_YSN_CAT_2 - margin.error_YSN_CAT_2
upper.bound_YSN_CAT_2 <- mean_YSN_CAT_2 + margin.error_YSN_CAT_2

#YSN_CAT == 3
mean_YSN_CAT_3 <- mean(dataset_male_empl_ysn_cat3$INCOME)
sample.n_YSN_CAT_3 <- length(dataset_male_empl_ysn_cat3$INCOME)
sample.sd_YSN_CAT_3 <- sd(dataset_male_empl_ysn_cat3$INCOME)
sample.se_YSN_CAT_3 <- sample.sd_YSN_CAT_3/sqrt(sample.n_YSN_CAT_3)
alpha = 0.05
degrees.freedom_YSN_CAT_3 = sample.n_YSN_CAT_3 - 1
t.score_YSN_CAT_3 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_3,lower.tail=F)
margin.error_YSN_CAT_3 <- t.score_YSN_CAT_3 * sample.se_YSN_CAT_3
lower.bound_YSN_CAT_3 <- mean_YSN_CAT_3 - margin.error_YSN_CAT_3
upper.bound_YSN_CAT_3 <- mean_YSN_CAT_3 + margin.error_YSN_CAT_3

#YSN_CAT == 4
mean_YSN_CAT_4 <- mean(dataset_male_empl_ysn_cat4$INCOME)
sample.n_YSN_CAT_4 <- length(dataset_male_empl_ysn_cat4$INCOME)
sample.sd_YSN_CAT_4 <- sd(dataset_male_empl_ysn_cat4$INCOME)
sample.se_YSN_CAT_4 <- sample.sd_YSN_CAT_4/sqrt(sample.n_YSN_CAT_4)
alpha = 0.05
degrees.freedom_YSN_CAT_4 = sample.n_YSN_CAT_4 - 1
t.score_YSN_CAT_4 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_4,lower.tail=F)
margin.error_YSN_CAT_4 <- t.score_YSN_CAT_4 * sample.se_YSN_CAT_4
lower.bound_YSN_CAT_4 <- mean_YSN_CAT_4 - margin.error_YSN_CAT_4
upper.bound_YSN_CAT_4 <- mean_YSN_CAT_4 + margin.error_YSN_CAT_4

#YSN_CAT == 5
mean_YSN_CAT_5 <- mean(dataset_male_empl_ysn_cat5$INCOME)
sample.n_YSN_CAT_5 <- length(dataset_male_empl_ysn_cat5$INCOME)
sample.sd_YSN_CAT_5 <- sd(dataset_male_empl_ysn_cat5$INCOME)
sample.se_YSN_CAT_5 <- sample.sd_YSN_CAT_5/sqrt(sample.n_YSN_CAT_5)
alpha = 0.05
degrees.freedom_YSN_CAT_5 = sample.n_YSN_CAT_5 - 1
t.score_YSN_CAT_5 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_5,lower.tail=F)
margin.error_YSN_CAT_5 <- t.score_YSN_CAT_5 * sample.se_YSN_CAT_5
lower.bound_YSN_CAT_5 <- mean_YSN_CAT_5 - margin.error_YSN_CAT_5
upper.bound_YSN_CAT_5 <- mean_YSN_CAT_5 + margin.error_YSN_CAT_5

#YSN_CAT == 6
mean_YSN_CAT_6 <- mean(dataset_male_empl_ysn_cat6$INCOME)
sample.n_YSN_CAT_6 <- length(dataset_male_empl_ysn_cat6$INCOME)
sample.sd_YSN_CAT_6 <- sd(dataset_male_empl_ysn_cat6$INCOME)
sample.se_YSN_CAT_6 <- sample.sd_YSN_CAT_6/sqrt(sample.n_YSN_CAT_6)
alpha = 0.05
degrees.freedom_YSN_CAT_6 = sample.n_YSN_CAT_6 - 1
t.score_YSN_CAT_6 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_6,lower.tail=F)
margin.error_YSN_CAT_6 <- t.score_YSN_CAT_6 * sample.se_YSN_CAT_6
lower.bound_YSN_CAT_6 <- mean_YSN_CAT_6 - margin.error_YSN_CAT_6
upper.bound_YSN_CAT_6 <- mean_YSN_CAT_6 + margin.error_YSN_CAT_6

#YSN_CAT == 7
mean_YSN_CAT_7 <- mean(dataset_male_empl_ysn_cat7$INCOME)
sample.n_YSN_CAT_7 <- length(dataset_male_empl_ysn_cat7$INCOME)
sample.sd_YSN_CAT_7 <- sd(dataset_male_empl_ysn_cat7$INCOME)
sample.se_YSN_CAT_7 <- sample.sd_YSN_CAT_7/sqrt(sample.n_YSN_CAT_7)
alpha = 0.05
degrees.freedom_YSN_CAT_7 = sample.n_YSN_CAT_7 - 1
t.score_YSN_CAT_7 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_7,lower.tail=F)
margin.error_YSN_CAT_7 <- t.score_YSN_CAT_7 * sample.se_YSN_CAT_7
lower.bound_YSN_CAT_7 <- mean_YSN_CAT_7 - margin.error_YSN_CAT_7
upper.bound_YSN_CAT_7 <- mean_YSN_CAT_7 + margin.error_YSN_CAT_7

#YSN_CAT == 8
mean_YSN_CAT_8 <- mean(dataset_male_empl_ysn_cat8$INCOME)
sample.n_YSN_CAT_8 <- length(dataset_male_empl_ysn_cat8$INCOME)
sample.sd_YSN_CAT_8 <- sd(dataset_male_empl_ysn_cat8$INCOME)
sample.se_YSN_CAT_8 <- sample.sd_YSN_CAT_8/sqrt(sample.n_YSN_CAT_8)
alpha = 0.05
degrees.freedom_YSN_CAT_8 = sample.n_YSN_CAT_8 - 1
t.score_YSN_CAT_8 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_8,lower.tail=F)
margin.error_YSN_CAT_8 <- t.score_YSN_CAT_8 * sample.se_YSN_CAT_8
lower.bound_YSN_CAT_8 <- mean_YSN_CAT_8 - margin.error_YSN_CAT_8
upper.bound_YSN_CAT_8 <- mean_YSN_CAT_8 + margin.error_YSN_CAT_8

#YSN_CAT == 9
mean_YSN_CAT_9 <- mean(dataset_male_empl_ysn_cat9$INCOME)
sample.n_YSN_CAT_9 <- length(dataset_male_empl_ysn_cat9$INCOME)
sample.sd_YSN_CAT_9 <- sd(dataset_male_empl_ysn_cat9$INCOME)
sample.se_YSN_CAT_9 <- sample.sd_YSN_CAT_9/sqrt(sample.n_YSN_CAT_9)
alpha = 0.05
degrees.freedom_YSN_CAT_9 = sample.n_YSN_CAT_9 - 1
t.score_YSN_CAT_9 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_9,lower.tail=F)
margin.error_YSN_CAT_9 <- t.score_YSN_CAT_9 * sample.se_YSN_CAT_9
lower.bound_YSN_CAT_9 <- mean_YSN_CAT_9 - margin.error_YSN_CAT_9
upper.bound_YSN_CAT_9 <- mean_YSN_CAT_9 + margin.error_YSN_CAT_9

#YSN_CAT == 10
mean_YSN_CAT_10 <- mean(dataset_male_empl_ysn_cat10$INCOME)
sample.n_YSN_CAT_10 <- length(dataset_male_empl_ysn_cat10$INCOME)
sample.sd_YSN_CAT_10 <- sd(dataset_male_empl_ysn_cat10$INCOME)
sample.se_YSN_CAT_10 <- sample.sd_YSN_CAT_10/sqrt(sample.n_YSN_CAT_10)
alpha = 0.05
degrees.freedom_YSN_CAT_10 = sample.n_YSN_CAT_10 - 1
t.score_YSN_CAT_10 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_10,lower.tail=F)
margin.error_YSN_CAT_10 <- t.score_YSN_CAT_10 * sample.se_YSN_CAT_10
lower.bound_YSN_CAT_10 <- mean_YSN_CAT_10 - margin.error_YSN_CAT_10
upper.bound_YSN_CAT_10 <- mean_YSN_CAT_10 + margin.error_YSN_CAT_10

#create age migration categories
dataset_male_empl_age_cat1 <- subset(dataset_male_empl, AGE_CAT == 1)
dataset_male_empl_age_cat2 <- subset(dataset_male_empl, AGE_CAT == 2)
dataset_male_empl_age_cat3 <- subset(dataset_male_empl, AGE_CAT == 3)
dataset_male_empl_age_cat4 <- subset(dataset_male_empl, AGE_CAT == 4)
dataset_male_empl_age_cat5 <- subset(dataset_male_empl, AGE_CAT == 5)
dataset_male_empl_age_cat6 <- subset(dataset_male_empl, AGE_CAT == 6)

#calculate mean, lower and upper 95% confidence intervals
#AGE_CAT == 1
mean_AGE_CAT_1 <- mean(dataset_male_empl_age_cat1$INCOME)
sample.n_AGE_CAT_1 <- length(dataset_male_empl_age_cat1$INCOME)
sample.sd_AGE_CAT_1 <- sd(dataset_male_empl_age_cat1$INCOME)
sample.se_AGE_CAT_1 <- sample.sd_AGE_CAT_1/sqrt(sample.n_AGE_CAT_1)
alpha = 0.05
degrees.freedom_AGE_CAT_1 = sample.n_AGE_CAT_1 - 1
t.score_AGE_CAT_1 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_1,lower.tail=F)
margin.error_AGE_CAT_1 <- t.score_AGE_CAT_1 * sample.se_AGE_CAT_1
lower.bound_AGE_CAT_1 <- mean_AGE_CAT_1 - margin.error_AGE_CAT_1
upper.bound_AGE_CAT_1 <- mean_AGE_CAT_1 + margin.error_AGE_CAT_1

#AGE_CAT == 2
mean_AGE_CAT_2 <- mean(dataset_male_empl_age_cat2$INCOME)
sample.n_AGE_CAT_2 <- length(dataset_male_empl_age_cat2$INCOME)
sample.sd_AGE_CAT_2 <- sd(dataset_male_empl_age_cat2$INCOME)
sample.se_AGE_CAT_2 <- sample.sd_AGE_CAT_2/sqrt(sample.n_AGE_CAT_2)
alpha = 0.05
degrees.freedom_AGE_CAT_2 = sample.n_AGE_CAT_2 - 1
t.score_AGE_CAT_2 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_2,lower.tail=F)
margin.error_AGE_CAT_2 <- t.score_AGE_CAT_2 * sample.se_AGE_CAT_2
lower.bound_AGE_CAT_2 <- mean_AGE_CAT_2 - margin.error_AGE_CAT_2
upper.bound_AGE_CAT_2 <- mean_AGE_CAT_2 + margin.error_AGE_CAT_2

#AGE_CAT == 3
mean_AGE_CAT_3 <- mean(dataset_male_empl_age_cat3$INCOME)
sample.n_AGE_CAT_3 <- length(dataset_male_empl_age_cat3$INCOME)
sample.sd_AGE_CAT_3 <- sd(dataset_male_empl_age_cat3$INCOME)
sample.se_AGE_CAT_3 <- sample.sd_AGE_CAT_3/sqrt(sample.n_AGE_CAT_3)
alpha = 0.05
degrees.freedom_AGE_CAT_3 = sample.n_AGE_CAT_3 - 1
t.score_AGE_CAT_3 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_3,lower.tail=F)
margin.error_AGE_CAT_3 <- t.score_AGE_CAT_3 * sample.se_AGE_CAT_3
lower.bound_AGE_CAT_3 <- mean_AGE_CAT_3 - margin.error_AGE_CAT_3
upper.bound_AGE_CAT_3 <- mean_AGE_CAT_3 + margin.error_AGE_CAT_3

#AGE_CAT == 4
mean_AGE_CAT_4 <- mean(dataset_male_empl_age_cat4$INCOME)
sample.n_AGE_CAT_4 <- length(dataset_male_empl_age_cat4$INCOME)
sample.sd_AGE_CAT_4 <- sd(dataset_male_empl_age_cat4$INCOME)
sample.se_AGE_CAT_4 <- sample.sd_AGE_CAT_4/sqrt(sample.n_AGE_CAT_4)
alpha = 0.05
degrees.freedom_AGE_CAT_4 = sample.n_AGE_CAT_4 - 1
t.score_AGE_CAT_4 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_4,lower.tail=F)
margin.error_AGE_CAT_4 <- t.score_AGE_CAT_4 * sample.se_AGE_CAT_4
lower.bound_AGE_CAT_4 <- mean_AGE_CAT_4 - margin.error_AGE_CAT_4
upper.bound_AGE_CAT_4 <- mean_AGE_CAT_4 + margin.error_AGE_CAT_4

#AGE_CAT == 5
mean_AGE_CAT_5 <- mean(dataset_male_empl_age_cat5$INCOME)
sample.n_AGE_CAT_5 <- length(dataset_male_empl_age_cat5$INCOME)
sample.sd_AGE_CAT_5 <- sd(dataset_male_empl_age_cat5$INCOME)
sample.se_AGE_CAT_5 <- sample.sd_AGE_CAT_5/sqrt(sample.n_AGE_CAT_5)
alpha = 0.05
degrees.freedom_AGE_CAT_5 = sample.n_AGE_CAT_5 - 1
t.score_AGE_CAT_5 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_5,lower.tail=F)
margin.error_AGE_CAT_5 <- t.score_AGE_CAT_5 * sample.se_AGE_CAT_5
lower.bound_AGE_CAT_5 <- mean_AGE_CAT_5 - margin.error_AGE_CAT_5
upper.bound_AGE_CAT_5 <- mean_AGE_CAT_5 + margin.error_AGE_CAT_5

#AGE_CAT == 6
mean_AGE_CAT_6 <- mean(dataset_male_empl_age_cat6$INCOME)
sample.n_AGE_CAT_6 <- length(dataset_male_empl_age_cat6$INCOME)
sample.sd_AGE_CAT_6 <- sd(dataset_male_empl_age_cat6$INCOME)
sample.se_AGE_CAT_6 <- sample.sd_AGE_CAT_6/sqrt(sample.n_AGE_CAT_6)
alpha = 0.05
degrees.freedom_AGE_CAT_6 = sample.n_AGE_CAT_6 - 1
t.score_AGE_CAT_6 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_6,lower.tail=F)
margin.error_AGE_CAT_6 <- t.score_AGE_CAT_6 * sample.se_AGE_CAT_6
lower.bound_AGE_CAT_6 <- mean_AGE_CAT_6 - margin.error_AGE_CAT_6
upper.bound_AGE_CAT_6 <- mean_AGE_CAT_6 + margin.error_AGE_CAT_6

#create years since migration categories
dataset_male_empl_ysm_cat1 <- subset(dataset_male_empl, YSM_CAT == 1)
dataset_male_empl_ysm_cat2 <- subset(dataset_male_empl, YSM_CAT == 2)
dataset_male_empl_ysm_cat3 <- subset(dataset_male_empl, YSM_CAT == 3)
dataset_male_empl_ysm_cat4 <- subset(dataset_male_empl, YSM_CAT == 4)
dataset_male_empl_ysm_cat4 <- subset(dataset_male_empl, YSM_CAT == 5)

#calculate mean, lower and upper 95% confidence intervals
#YSM_CAT == 1
mean_YSM_CAT_1 <- mean(dataset_male_empl_ysm_cat1$INCOME)
sample.n_YSM_CAT_1 <- length(dataset_male_empl_ysm_cat1$INCOME)
sample.sd_YSM_CAT_1 <- sd(dataset_male_empl_ysm_cat1$INCOME)
sample.se_YSM_CAT_1 <- sample.sd_YSM_CAT_1/sqrt(sample.n_YSM_CAT_1)
alpha = 0.05
degrees.freedom_YSM_CAT_1 = sample.n_YSM_CAT_1 - 1
t.score_YSM_CAT_1 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_1,lower.tail=F)
margin.error_YSM_CAT_1 <- t.score_YSM_CAT_1 * sample.se_YSM_CAT_1
lower.bound_YSM_CAT_1 <- mean_YSM_CAT_1 - margin.error_YSM_CAT_1
upper.bound_YSM_CAT_1 <- mean_YSM_CAT_1 + margin.error_YSM_CAT_1

#YSM_CAT == 2
mean_YSM_CAT_2 <- mean(dataset_male_empl_ysm_cat2$INCOME)
sample.n_YSM_CAT_2 <- length(dataset_male_empl_ysm_cat2$INCOME)
sample.sd_YSM_CAT_2 <- sd(dataset_male_empl_ysm_cat2$INCOME)
sample.se_YSM_CAT_2 <- sample.sd_YSM_CAT_2/sqrt(sample.n_YSM_CAT_2)
alpha = 0.05
degrees.freedom_YSM_CAT_2 = sample.n_YSM_CAT_2 - 1
t.score_YSM_CAT_2 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_2,lower.tail=F)
margin.error_YSM_CAT_2 <- t.score_YSM_CAT_2 * sample.se_YSM_CAT_2
lower.bound_YSM_CAT_2 <- mean_YSM_CAT_2 - margin.error_YSM_CAT_2
upper.bound_YSM_CAT_2 <- mean_YSM_CAT_2 + margin.error_YSM_CAT_2

#YSM_CAT == 3
mean_YSM_CAT_3 <- mean(dataset_male_empl_ysm_cat3$INCOME)
sample.n_YSM_CAT_3 <- length(dataset_male_empl_ysm_cat3$INCOME)
sample.sd_YSM_CAT_3 <- sd(dataset_male_empl_ysm_cat3$INCOME)
sample.se_YSM_CAT_3 <- sample.sd_YSM_CAT_3/sqrt(sample.n_YSM_CAT_3)
alpha = 0.05
degrees.freedom_YSM_CAT_3 = sample.n_YSM_CAT_3 - 1
t.score_YSM_CAT_3 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_3,lower.tail=F)
margin.error_YSM_CAT_3 <- t.score_YSM_CAT_3 * sample.se_YSM_CAT_3
lower.bound_YSM_CAT_3 <- mean_YSM_CAT_3 - margin.error_YSM_CAT_3
upper.bound_YSM_CAT_3 <- mean_YSM_CAT_3 + margin.error_YSM_CAT_3

#YSM_CAT == 4
mean_YSM_CAT_4 <- mean(dataset_male_empl_ysm_cat4$INCOME)
sample.n_YSM_CAT_4 <- length(dataset_male_empl_ysm_cat4$INCOME)
sample.sd_YSM_CAT_4 <- sd(dataset_male_empl_ysm_cat4$INCOME)
sample.se_YSM_CAT_4 <- sample.sd_YSM_CAT_4/sqrt(sample.n_YSM_CAT_4)
alpha = 0.05
degrees.freedom_YSM_CAT_4 = sample.n_YSM_CAT_4 - 1
t.score_YSM_CAT_4 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_4,lower.tail=F)
margin.error_YSM_CAT_4 <- t.score_YSM_CAT_4 * sample.se_YSM_CAT_4
lower.bound_YSM_CAT_4 <- mean_YSM_CAT_4 - margin.error_YSM_CAT_4
upper.bound_YSM_CAT_4 <- mean_YSM_CAT_4 + margin.error_YSM_CAT_4

#YSM_CAT == 5
mean_YSM_CAT_5 <- mean(dataset_male_empl_ysm_cat5$INCOME)
sample.n_YSM_CAT_5 <- length(dataset_male_empl_ysm_cat5$INCOME)
sample.sd_YSM_CAT_5 <- sd(dataset_male_empl_ysm_cat5$INCOME)
sample.se_YSM_CAT_5 <- sample.sd_YSM_CAT_5/sqrt(sample.n_YSM_CAT_5)
alpha = 0.05
degrees.freedom_YSM_CAT_5 = sample.n_YSM_CAT_5 - 1
t.score_YSM_CAT_5 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_5,lower.tail=F)
margin.error_YSM_CAT_5 <- t.score_YSM_CAT_5 * sample.se_YSM_CAT_5
lower.bound_YSM_CAT_5 <- mean_YSM_CAT_5 - margin.error_YSM_CAT_5
upper.bound_YSM_CAT_5 <- mean_YSM_CAT_5 + margin.error_YSM_CAT_5

#create partner categories
dataset_male_empl_partner1 <- subset(dataset_male_empl, PARTNER == 1)
dataset_male_empl_partner2 <- subset(dataset_male_empl, PARTNER == 2)
dataset_male_empl_partner3 <- subset(dataset_male_empl, PARTNER == 3)
dataset_male_empl_partner4 <- subset(dataset_male_empl, PARTNER == 4)

#calculate mean, lower and upper 95% confidence intervals
#PARTNER == 1
mean_PARTNER_1 <- mean(dataset_male_empl_partner1$INCOME)
sample.n_PARTNER_1 <- length(dataset_male_empl_partner1$INCOME)
sample.sd_PARTNER_1 <- sd(dataset_male_empl_partner1$INCOME)
sample.se_PARTNER_1 <- sample.sd_PARTNER_1/sqrt(sample.n_PARTNER_1)
alpha = 0.05
degrees.freedom_PARTNER_1 = sample.n_PARTNER_1 - 1
t.score_PARTNER_1 = qt(p=alpha/2, df=degrees.freedom_PARTNER_1,lower.tail=F)
margin.error_PARTNER_1 <- t.score_PARTNER_1 * sample.se_PARTNER_1
lower.bound_PARTNER_1 <- mean_PARTNER_1 - margin.error_PARTNER_1
upper.bound_PARTNER_1 <- mean_PARTNER_1 + margin.error_PARTNER_1

#PARTNER == 2
mean_PARTNER_2 <- mean(dataset_male_empl_partner2$INCOME)
sample.n_PARTNER_2 <- length(dataset_male_empl_partner2$INCOME)
sample.sd_PARTNER_2 <- sd(dataset_male_empl_partner2$INCOME)
sample.se_PARTNER_2 <- sample.sd_PARTNER_2/sqrt(sample.n_PARTNER_2)
alpha = 0.05
degrees.freedom_PARTNER_2 = sample.n_PARTNER_2 - 1
t.score_PARTNER_2 = qt(p=alpha/2, df=degrees.freedom_PARTNER_2,lower.tail=F)
margin.error_PARTNER_2 <- t.score_PARTNER_2 * sample.se_PARTNER_2
lower.bound_PARTNER_2 <- mean_PARTNER_2 - margin.error_PARTNER_2
upper.bound_PARTNER_2 <- mean_PARTNER_2 + margin.error_PARTNER_2

#PARTNER == 3
mean_PARTNER_3 <- mean(dataset_male_empl_partner3$INCOME)
sample.n_PARTNER_3 <- length(dataset_male_empl_partner3$INCOME)
sample.sd_PARTNER_3 <- sd(dataset_male_empl_partner3$INCOME)
sample.se_PARTNER_3 <- sample.sd_PARTNER_3/sqrt(sample.n_PARTNER_3)
alpha = 0.05
degrees.freedom_PARTNER_3 = sample.n_PARTNER_3 - 1
t.score_PARTNER_3 = qt(p=alpha/2, df=degrees.freedom_PARTNER_3,lower.tail=F)
margin.error_PARTNER_3 <- t.score_PARTNER_3 * sample.se_PARTNER_3
lower.bound_PARTNER_3 <- mean_PARTNER_3 - margin.error_PARTNER_3
upper.bound_PARTNER_3 <- mean_PARTNER_3 + margin.error_PARTNER_3

#PARTNER == 4
mean_PARTNER_4 <- mean(dataset_male_empl_partner4$INCOME)
sample.n_PARTNER_4 <- length(dataset_male_empl_partner4$INCOME)
sample.sd_PARTNER_4 <- sd(dataset_male_empl_partner4$INCOME)
sample.se_PARTNER_4 <- sample.sd_PARTNER_4/sqrt(sample.n_PARTNER_4)
alpha = 0.05
degrees.freedom_PARTNER_4 = sample.n_PARTNER_4 - 1
t.score_PARTNER_4 = qt(p=alpha/2, df=degrees.freedom_PARTNER_4,lower.tail=F)
margin.error_PARTNER_4 <- t.score_PARTNER_4 * sample.se_PARTNER_4
lower.bound_PARTNER_4 <- mean_PARTNER_4 - margin.error_PARTNER_4
upper.bound_PARTNER_4 <- mean_PARTNER_4 + margin.error_PARTNER_4

#create child categories
dataset_male_empl_child0 <- subset(dataset_male_empl, CHILD == 0)
dataset_male_empl_child1 <- subset(dataset_male_empl, CHILD == 1)

#calculate mean, lower and upper 95% confidence intervals
#CHILD == 0
mean_CHILD_0 <- mean(dataset_male_empl_child0$INCOME)
sample.n_CHILD_0 <- length(dataset_male_empl_child0$INCOME)
sample.sd_CHILD_0 <- sd(dataset_male_empl_child0$INCOME)
sample.se_CHILD_0 <- sample.sd_CHILD_0/sqrt(sample.n_CHILD_0)
alpha = 0.05
degrees.freedom_CHILD_0 = sample.n_CHILD_0 - 1
t.score_CHILD_0 = qt(p=alpha/2, df=degrees.freedom_CHILD_0,lower.tail=F)
margin.error_CHILD_0 <- t.score_CHILD_0 * sample.se_CHILD_0
lower.bound_CHILD_0 <- mean_CHILD_0 - margin.error_CHILD_0
upper.bound_CHILD_0 <- mean_CHILD_0 + margin.error_CHILD_0

#CHILD == 1
mean_CHILD_1 <- mean(dataset_male_empl_child1$INCOME)
sample.n_CHILD_1 <- length(dataset_male_empl_child1$INCOME)
sample.sd_CHILD_1 <- sd(dataset_male_empl_child1$INCOME)
sample.se_CHILD_1 <- sample.sd_CHILD_1/sqrt(sample.n_CHILD_1)
alpha = 0.05
degrees.freedom_CHILD_1 = sample.n_CHILD_1 - 1
t.score_CHILD_1 = qt(p=alpha/2, df=degrees.freedom_CHILD_1,lower.tail=F)
margin.error_CHILD_1 <- t.score_CHILD_1 * sample.se_CHILD_1
lower.bound_CHILD_1 <- mean_CHILD_1 - margin.error_CHILD_1
upper.bound_CHILD_1 <- mean_CHILD_1 + margin.error_CHILD_1

#create development country of origin categories
dataset_male_empl_developmentrec_Q_1 <- subset(dataset_male_empl, DEVELOPMENTREC_Q == 1)
dataset_male_empl_developmentrec_Q_2 <- subset(dataset_male_empl, DEVELOPMENTREC_Q == 2)
dataset_male_empl_developmentrec_Q_3 <- subset(dataset_male_empl, DEVELOPMENTREC_Q == 3)
dataset_male_empl_developmentrec_Q_4 <- subset(dataset_male_empl, DEVELOPMENTREC_Q == 4)

#calculate mean, lower and upper 95% confidence intervals
#DEVELOPMENTREC_Q == 1
mean_DEVELOPMENTREC_Q_1 <- mean(dataset_male_empl_developmentrec_Q_1$INCOME)
sample.n_DEVELOPMENTREC_Q_1 <- length(dataset_male_empl_developmentrec_Q_1$INCOME)
sample.sd_DEVELOPMENTREC_Q_1 <- sd(dataset_male_empl_developmentrec_Q_1$INCOME)
sample.se_DEVELOPMENTREC_Q_1 <- sample.sd_DEVELOPMENTREC_Q_1/sqrt(sample.n_DEVELOPMENTREC_Q_1)
alpha = 0.05
degrees.freedom_DEVELOPMENTREC_Q_1 = sample.n_DEVELOPMENTREC_Q_1 - 1
t.score_DEVELOPMENTREC_Q_1 = qt(p=alpha/2, df=degrees.freedom_DEVELOPMENTREC_Q_1,lower.tail=F)
margin.error_DEVELOPMENTREC_Q_1 <- t.score_DEVELOPMENTREC_Q_1 * sample.se_DEVELOPMENTREC_Q_1
lower.bound_DEVELOPMENTREC_Q_1 <- mean_DEVELOPMENTREC_Q_1 - margin.error_DEVELOPMENTREC_Q_1
upper.bound_DEVELOPMENTREC_Q_1 <- mean_DEVELOPMENTREC_Q_1 + margin.error_DEVELOPMENTREC_Q_1

#DEVELOPMENTREC_Q == 2
mean_DEVELOPMENTREC_Q_2 <- mean(dataset_male_empl_developmentrec_Q_2$INCOME)
sample.n_DEVELOPMENTREC_Q_2 <- length(dataset_male_empl_developmentrec_Q_2$INCOME)
sample.sd_DEVELOPMENTREC_Q_2 <- sd(dataset_male_empl_developmentrec_Q_2$INCOME)
sample.se_DEVELOPMENTREC_Q_2 <- sample.sd_DEVELOPMENTREC_Q_2/sqrt(sample.n_DEVELOPMENTREC_Q_2)
alpha = 0.05
degrees.freedom_DEVELOPMENTREC_Q_2 = sample.n_DEVELOPMENTREC_Q_2 - 1
t.score_DEVELOPMENTREC_Q_2 = qt(p=alpha/2, df=degrees.freedom_DEVELOPMENTREC_Q_2,lower.tail=F)
margin.error_DEVELOPMENTREC_Q_2 <- t.score_DEVELOPMENTREC_Q_2 * sample.se_DEVELOPMENTREC_Q_2
lower.bound_DEVELOPMENTREC_Q_2 <- mean_DEVELOPMENTREC_Q_2 - margin.error_DEVELOPMENTREC_Q_2
upper.bound_DEVELOPMENTREC_Q_2 <- mean_DEVELOPMENTREC_Q_2 + margin.error_DEVELOPMENTREC_Q_2

#DEVELOPMENTREC_Q == 3
mean_DEVELOPMENTREC_Q_3 <- mean(dataset_male_empl_developmentrec_Q_3$INCOME)
sample.n_DEVELOPMENTREC_Q_3 <- length(dataset_male_empl_developmentrec_Q_3$INCOME)
sample.sd_DEVELOPMENTREC_Q_3 <- sd(dataset_male_empl_developmentrec_Q_3$INCOME)
sample.se_DEVELOPMENTREC_Q_3 <- sample.sd_DEVELOPMENTREC_Q_3/sqrt(sample.n_DEVELOPMENTREC_Q_3)
alpha = 0.05
degrees.freedom_DEVELOPMENTREC_Q_3 = sample.n_DEVELOPMENTREC_Q_3 - 1
t.score_DEVELOPMENTREC_Q_3 = qt(p=alpha/2, df=degrees.freedom_DEVELOPMENTREC_Q_3,lower.tail=F)
margin.error_DEVELOPMENTREC_Q_3 <- t.score_DEVELOPMENTREC_Q_3 * sample.se_DEVELOPMENTREC_Q_3
lower.bound_DEVELOPMENTREC_Q_3 <- mean_DEVELOPMENTREC_Q_3 - margin.error_DEVELOPMENTREC_Q_3
upper.bound_DEVELOPMENTREC_Q_3 <- mean_DEVELOPMENTREC_Q_3 + margin.error_DEVELOPMENTREC_Q_3

#DEVELOPMENTREC_Q == 4
mean_DEVELOPMENTREC_Q_4 <- mean(dataset_male_empl_developmentrec_Q_4$INCOME)
sample.n_DEVELOPMENTREC_Q_4 <- length(dataset_male_empl_developmentrec_Q_4$INCOME)
sample.sd_DEVELOPMENTREC_Q_4 <- sd(dataset_male_empl_developmentrec_Q_4$INCOME)
sample.se_DEVELOPMENTREC_Q_4 <- sample.sd_DEVELOPMENTREC_Q_4/sqrt(sample.n_DEVELOPMENTREC_Q_4)
alpha = 0.05
degrees.freedom_DEVELOPMENTREC_Q_4 = sample.n_DEVELOPMENTREC_Q_4 - 1
t.score_DEVELOPMENTREC_Q_4 = qt(p=alpha/2, df=degrees.freedom_DEVELOPMENTREC_Q_4,lower.tail=F)
margin.error_DEVELOPMENTREC_Q_4 <- t.score_DEVELOPMENTREC_Q_4 * sample.se_DEVELOPMENTREC_Q_4
lower.bound_DEVELOPMENTREC_Q_4 <- mean_DEVELOPMENTREC_Q_4 - margin.error_DEVELOPMENTREC_Q_4
upper.bound_DEVELOPMENTREC_Q_4 <- mean_DEVELOPMENTREC_Q_4 + margin.error_DEVELOPMENTREC_Q_4

#create EU country of origin categories
dataset_male_empl_EU_0 <- subset(dataset_male_empl, EU == 0)
dataset_male_empl_EU_1 <- subset(dataset_male_empl, EU == 1)

#calculate mean, lower and upper 95% confidence intervals
#EU == 0
mean_EU_0 <- mean(dataset_male_empl_EU_0$INCOME)
sample.n_EU_0 <- length(dataset_male_empl_EU_0$INCOME)
sample.sd_EU_0 <- sd(dataset_male_empl_EU_0$INCOME)
sample.se_EU_0 <- sample.sd_EU_0/sqrt(sample.n_EU_0)
alpha = 0.05
degrees.freedom_EU_0 = sample.n_EU_0 - 1
t.score_EU_0 = qt(p=alpha/2, df=degrees.freedom_EU_0,lower.tail=F)
margin.error_EU_0 <- t.score_EU_0 * sample.se_EU_0
lower.bound_EU_0 <- mean_EU_0 - margin.error_EU_0
upper.bound_EU_0 <- mean_EU_0 + margin.error_EU_0

#EU == 1
mean_EU_1 <- mean(dataset_male_empl_EU_1$INCOME)
sample.n_EU_1 <- length(dataset_male_empl_EU_1$INCOME)
sample.sd_EU_1 <- sd(dataset_male_empl_EU_1$INCOME)
sample.se_EU_1 <- sample.sd_EU_1/sqrt(sample.n_EU_1)
alpha = 0.05
degrees.freedom_EU_1 = sample.n_EU_1 - 1
t.score_EU_1 = qt(p=alpha/2, df=degrees.freedom_EU_1,lower.tail=F)
margin.error_EU_1 <- t.score_EU_1 * sample.se_EU_1
lower.bound_EU_1 <- mean_EU_1 - margin.error_EU_1
upper.bound_EU_1 <- mean_EU_1 + margin.error_EU_1

#select female immigrants with employment
dataset_female_empl <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1)

#create years since naturalization categories
dataset_female_empl_ysn_cat1 <- subset(dataset_female_empl, YSN_CAT == 1)
dataset_female_empl_ysn_cat2 <- subset(dataset_female_empl, YSN_CAT == 2)
dataset_female_empl_ysn_cat3 <- subset(dataset_female_empl, YSN_CAT == 3)
dataset_female_empl_ysn_cat4 <- subset(dataset_female_empl, YSN_CAT == 4)
dataset_female_empl_ysn_cat5 <- subset(dataset_female_empl, YSN_CAT == 5)
dataset_female_empl_ysn_cat6 <- subset(dataset_female_empl, YSN_CAT == 6)
dataset_female_empl_ysn_cat7 <- subset(dataset_female_empl, YSN_CAT == 7)
dataset_female_empl_ysn_cat8 <- subset(dataset_female_empl, YSN_CAT == 8)
dataset_female_empl_ysn_cat9 <- subset(dataset_female_empl, YSN_CAT == 9)
dataset_female_empl_ysn_cat10 <- subset(dataset_female_empl, YSN_CAT == 10)

#calculate mean, lower and upper 95% confidence intervals
#YSN_cat == 1
mean_YSN_CAT_1 <- mean(dataset_female_empl_ysn_cat1$INCOME)
sample.n_YSN_CAT_1 <- length(dataset_female_empl_ysn_cat1$INCOME)
sample.sd_YSN_CAT_1 <- sd(dataset_female_empl_ysn_cat1$INCOME)
sample.se_YSN_CAT_1 <- sample.sd_YSN_CAT_1/sqrt(sample.n_YSN_CAT_1)
alpha = 0.05
degrees.freedom_YSN_CAT_1 = sample.n_YSN_CAT_1 - 1
t.score_YSN_CAT_1 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_1,lower.tail=F)
margin.error_YSN_CAT_1 <- t.score_YSN_CAT_1 * sample.se_YSN_CAT_1
lower.bound_YSN_CAT_1 <- mean_YSN_CAT_1 - margin.error_YSN_CAT_1
upper.bound_YSN_CAT_1 <- mean_YSN_CAT_1 + margin.error_YSN_CAT_1

#YSN_cat == 2
mean_YSN_CAT_2 <- mean(dataset_female_empl_ysn_cat2$INCOME)
sample.n_YSN_CAT_2 <- length(dataset_female_empl_ysn_cat2$INCOME)
sample.sd_YSN_CAT_2 <- sd(dataset_female_empl_ysn_cat2$INCOME)
sample.se_YSN_CAT_2 <- sample.sd_YSN_CAT_2/sqrt(sample.n_YSN_CAT_2)
alpha = 0.05
degrees.freedom_YSN_CAT_2 = sample.n_YSN_CAT_2 - 1
t.score_YSN_CAT_2 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_2,lower.tail=F)
margin.error_YSN_CAT_2 <- t.score_YSN_CAT_2 * sample.se_YSN_CAT_2
lower.bound_YSN_CAT_2 <- mean_YSN_CAT_2 - margin.error_YSN_CAT_2
upper.bound_YSN_CAT_2 <- mean_YSN_CAT_2 + margin.error_YSN_CAT_2

#YSN_cat == 3
mean_YSN_CAT_3 <- mean(dataset_female_empl_ysn_cat3$INCOME)
sample.n_YSN_CAT_3 <- length(dataset_female_empl_ysn_cat3$INCOME)
sample.sd_YSN_CAT_3 <- sd(dataset_female_empl_ysn_cat3$INCOME)
sample.se_YSN_CAT_3 <- sample.sd_YSN_CAT_3/sqrt(sample.n_YSN_CAT_3)
alpha = 0.05
degrees.freedom_YSN_CAT_3 = sample.n_YSN_CAT_3 - 1
t.score_YSN_CAT_3 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_3,lower.tail=F)
margin.error_YSN_CAT_3 <- t.score_YSN_CAT_3 * sample.se_YSN_CAT_3
lower.bound_YSN_CAT_3 <- mean_YSN_CAT_3 - margin.error_YSN_CAT_3
upper.bound_YSN_CAT_3 <- mean_YSN_CAT_3 + margin.error_YSN_CAT_3

#YSN_cat == 4
mean_YSN_CAT_4 <- mean(dataset_female_empl_ysn_cat4$INCOME)
sample.n_YSN_CAT_4 <- length(dataset_female_empl_ysn_cat4$INCOME)
sample.sd_YSN_CAT_4 <- sd(dataset_female_empl_ysn_cat4$INCOME)
sample.se_YSN_CAT_4 <- sample.sd_YSN_CAT_4/sqrt(sample.n_YSN_CAT_4)
alpha = 0.05
degrees.freedom_YSN_CAT_4 = sample.n_YSN_CAT_4 - 1
t.score_YSN_CAT_4 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_4,lower.tail=F)
margin.error_YSN_CAT_4 <- t.score_YSN_CAT_4 * sample.se_YSN_CAT_4
lower.bound_YSN_CAT_4 <- mean_YSN_CAT_4 - margin.error_YSN_CAT_4
upper.bound_YSN_CAT_4 <- mean_YSN_CAT_4 + margin.error_YSN_CAT_4

#YSN_cat == 5
mean_YSN_CAT_5 <- mean(dataset_female_empl_ysn_cat5$INCOME)
sample.n_YSN_CAT_5 <- length(dataset_female_empl_ysn_cat5$INCOME)
sample.sd_YSN_CAT_5 <- sd(dataset_female_empl_ysn_cat5$INCOME)
sample.se_YSN_CAT_5 <- sample.sd_YSN_CAT_5/sqrt(sample.n_YSN_CAT_5)
alpha = 0.05
degrees.freedom_YSN_CAT_5 = sample.n_YSN_CAT_5 - 1
t.score_YSN_CAT_5 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_5,lower.tail=F)
margin.error_YSN_CAT_5 <- t.score_YSN_CAT_5 * sample.se_YSN_CAT_5
lower.bound_YSN_CAT_5 <- mean_YSN_CAT_5 - margin.error_YSN_CAT_5
upper.bound_YSN_CAT_5 <- mean_YSN_CAT_5 + margin.error_YSN_CAT_5

#YSN_cat == 6
mean_YSN_CAT_6 <- mean(dataset_female_empl_ysn_cat6$INCOME)
sample.n_YSN_CAT_6 <- length(dataset_female_empl_ysn_cat6$INCOME)
sample.sd_YSN_CAT_6 <- sd(dataset_female_empl_ysn_cat6$INCOME)
sample.se_YSN_CAT_6 <- sample.sd_YSN_CAT_6/sqrt(sample.n_YSN_CAT_6)
alpha = 0.05
degrees.freedom_YSN_CAT_6 = sample.n_YSN_CAT_6 - 1
t.score_YSN_CAT_6 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_6,lower.tail=F)
margin.error_YSN_CAT_6 <- t.score_YSN_CAT_6 * sample.se_YSN_CAT_6
lower.bound_YSN_CAT_6 <- mean_YSN_CAT_6 - margin.error_YSN_CAT_6
upper.bound_YSN_CAT_6 <- mean_YSN_CAT_6 + margin.error_YSN_CAT_6

#YSN_cat == 7
mean_YSN_CAT_7 <- mean(dataset_female_empl_ysn_cat7$INCOME)
sample.n_YSN_CAT_7 <- length(dataset_female_empl_ysn_cat7$INCOME)
sample.sd_YSN_CAT_7 <- sd(dataset_female_empl_ysn_cat7$INCOME)
sample.se_YSN_CAT_7 <- sample.sd_YSN_CAT_7/sqrt(sample.n_YSN_CAT_7)
alpha = 0.05
degrees.freedom_YSN_CAT_7 = sample.n_YSN_CAT_7 - 1
t.score_YSN_CAT_7 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_7,lower.tail=F)
margin.error_YSN_CAT_7 <- t.score_YSN_CAT_7 * sample.se_YSN_CAT_7
lower.bound_YSN_CAT_7 <- mean_YSN_CAT_7 - margin.error_YSN_CAT_7
upper.bound_YSN_CAT_7 <- mean_YSN_CAT_7 + margin.error_YSN_CAT_7

#YSN_cat == 8
mean_YSN_CAT_8 <- mean(dataset_female_empl_ysn_cat8$INCOME)
sample.n_YSN_CAT_8 <- length(dataset_female_empl_ysn_cat8$INCOME)
sample.sd_YSN_CAT_8 <- sd(dataset_female_empl_ysn_cat8$INCOME)
sample.se_YSN_CAT_8 <- sample.sd_YSN_CAT_8/sqrt(sample.n_YSN_CAT_8)
alpha = 0.05
degrees.freedom_YSN_CAT_8 = sample.n_YSN_CAT_8 - 1
t.score_YSN_CAT_8 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_8,lower.tail=F)
margin.error_YSN_CAT_8 <- t.score_YSN_CAT_8 * sample.se_YSN_CAT_8
lower.bound_YSN_CAT_8 <- mean_YSN_CAT_8 - margin.error_YSN_CAT_8
upper.bound_YSN_CAT_8 <- mean_YSN_CAT_8 + margin.error_YSN_CAT_8

#YSN_cat == 9
mean_YSN_CAT_9 <- mean(dataset_female_empl_ysn_cat9$INCOME)
sample.n_YSN_CAT_9 <- length(dataset_female_empl_ysn_cat9$INCOME)
sample.sd_YSN_CAT_9 <- sd(dataset_female_empl_ysn_cat9$INCOME)
sample.se_YSN_CAT_9 <- sample.sd_YSN_CAT_9/sqrt(sample.n_YSN_CAT_9)
alpha = 0.05
degrees.freedom_YSN_CAT_9 = sample.n_YSN_CAT_9 - 1
t.score_YSN_CAT_9 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_9,lower.tail=F)
margin.error_YSN_CAT_9 <- t.score_YSN_CAT_9 * sample.se_YSN_CAT_9
lower.bound_YSN_CAT_9 <- mean_YSN_CAT_9 - margin.error_YSN_CAT_9
upper.bound_YSN_CAT_9 <- mean_YSN_CAT_9 + margin.error_YSN_CAT_9

#YSN_cat == 10
mean_YSN_CAT_10 <- mean(dataset_female_empl_ysn_cat10$INCOME)
sample.n_YSN_CAT_10 <- length(dataset_female_empl_ysn_cat10$INCOME)
sample.sd_YSN_CAT_10 <- sd(dataset_female_empl_ysn_cat10$INCOME)
sample.se_YSN_CAT_10 <- sample.sd_YSN_CAT_10/sqrt(sample.n_YSN_CAT_10)
alpha = 0.05
degrees.freedom_YSN_CAT_10 = sample.n_YSN_CAT_10 - 1
t.score_YSN_CAT_10 = qt(p=alpha/2, df=degrees.freedom_YSN_CAT_10,lower.tail=F)
margin.error_YSN_CAT_10 <- t.score_YSN_CAT_10 * sample.se_YSN_CAT_10
lower.bound_YSN_CAT_10 <- mean_YSN_CAT_10 - margin.error_YSN_CAT_10
upper.bound_YSN_CAT_10 <- mean_YSN_CAT_10 + margin.error_YSN_CAT_10

#create age at migration categories
dataset_female_empl_age_cat1 <- subset(dataset_female_empl, AGE_CAT == 1)
dataset_female_empl_age_cat2 <- subset(dataset_female_empl, AGE_CAT == 2)
dataset_female_empl_age_cat3 <- subset(dataset_female_empl, AGE_CAT == 3)
dataset_female_empl_age_cat4 <- subset(dataset_female_empl, AGE_CAT == 4)
dataset_female_empl_age_cat5 <- subset(dataset_female_empl, AGE_CAT == 5)
dataset_female_empl_age_cat6 <- subset(dataset_female_empl, AGE_CAT == 6)

#calculate mean, lower and upper 95% confidence intervals
#AGE_CAT == 1
mean_AGE_CAT_1 <- mean(dataset_female_empl_age_cat1$INCOME)
sample.n_AGE_CAT_1 <- length(dataset_female_empl_age_cat1$INCOME)
sample.sd_AGE_CAT_1 <- sd(dataset_female_empl_age_cat1$INCOME)
sample.se_AGE_CAT_1 <- sample.sd_AGE_CAT_1/sqrt(sample.n_AGE_CAT_1)
alpha = 0.05
degrees.freedom_AGE_CAT_1 = sample.n_AGE_CAT_1 - 1
t.score_AGE_CAT_1 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_1,lower.tail=F)
margin.error_AGE_CAT_1 <- t.score_AGE_CAT_1 * sample.se_AGE_CAT_1
lower.bound_AGE_CAT_1 <- mean_AGE_CAT_1 - margin.error_AGE_CAT_1
upper.bound_AGE_CAT_1 <- mean_AGE_CAT_1 + margin.error_AGE_CAT_1

#AGE_CAT == 2
mean_AGE_CAT_2 <- mean(dataset_female_empl_age_cat2$INCOME)
sample.n_AGE_CAT_2 <- length(dataset_female_empl_age_cat2$INCOME)
sample.sd_AGE_CAT_2 <- sd(dataset_female_empl_age_cat2$INCOME)
sample.se_AGE_CAT_2 <- sample.sd_AGE_CAT_2/sqrt(sample.n_AGE_CAT_2)
alpha = 0.05
degrees.freedom_AGE_CAT_2 = sample.n_AGE_CAT_2 - 1
t.score_AGE_CAT_2 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_2,lower.tail=F)
margin.error_AGE_CAT_2 <- t.score_AGE_CAT_2 * sample.se_AGE_CAT_2
lower.bound_AGE_CAT_2 <- mean_AGE_CAT_2 - margin.error_AGE_CAT_2
upper.bound_AGE_CAT_2 <- mean_AGE_CAT_2 + margin.error_AGE_CAT_2

#AGE_CAT == 3
mean_AGE_CAT_3 <- mean(dataset_female_empl_age_cat3$INCOME)
sample.n_AGE_CAT_3 <- length(dataset_female_empl_age_cat3$INCOME)
sample.sd_AGE_CAT_3 <- sd(dataset_female_empl_age_cat3$INCOME)
sample.se_AGE_CAT_3 <- sample.sd_AGE_CAT_3/sqrt(sample.n_AGE_CAT_3)
alpha = 0.05
degrees.freedom_AGE_CAT_3 = sample.n_AGE_CAT_3 - 1
t.score_AGE_CAT_3 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_3,lower.tail=F)
margin.error_AGE_CAT_3 <- t.score_AGE_CAT_3 * sample.se_AGE_CAT_3
lower.bound_AGE_CAT_3 <- mean_AGE_CAT_3 - margin.error_AGE_CAT_3
upper.bound_AGE_CAT_3 <- mean_AGE_CAT_3 + margin.error_AGE_CAT_3

#AGE_CAT == 4
mean_AGE_CAT_4 <- mean(dataset_female_empl_age_cat4$INCOME)
sample.n_AGE_CAT_4 <- length(dataset_female_empl_age_cat4$INCOME)
sample.sd_AGE_CAT_4 <- sd(dataset_female_empl_age_cat4$INCOME)
sample.se_AGE_CAT_4 <- sample.sd_AGE_CAT_4/sqrt(sample.n_AGE_CAT_4)
alpha = 0.05
degrees.freedom_AGE_CAT_4 = sample.n_AGE_CAT_4 - 1
t.score_AGE_CAT_4 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_4,lower.tail=F)
margin.error_AGE_CAT_4 <- t.score_AGE_CAT_4 * sample.se_AGE_CAT_4
lower.bound_AGE_CAT_4 <- mean_AGE_CAT_4 - margin.error_AGE_CAT_4
upper.bound_AGE_CAT_4 <- mean_AGE_CAT_4 + margin.error_AGE_CAT_4

#AGE_CAT == 5
mean_AGE_CAT_5 <- mean(dataset_female_empl_age_cat5$INCOME)
sample.n_AGE_CAT_5 <- length(dataset_female_empl_age_cat5$INCOME)
sample.sd_AGE_CAT_5 <- sd(dataset_female_empl_age_cat5$INCOME)
sample.se_AGE_CAT_5 <- sample.sd_AGE_CAT_5/sqrt(sample.n_AGE_CAT_5)
alpha = 0.05
degrees.freedom_AGE_CAT_5 = sample.n_AGE_CAT_5 - 1
t.score_AGE_CAT_5 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_5,lower.tail=F)
margin.error_AGE_CAT_5 <- t.score_AGE_CAT_5 * sample.se_AGE_CAT_5
lower.bound_AGE_CAT_5 <- mean_AGE_CAT_5 - margin.error_AGE_CAT_5
upper.bound_AGE_CAT_5 <- mean_AGE_CAT_5 + margin.error_AGE_CAT_5

#AGE_CAT == 6
mean_AGE_CAT_6 <- mean(dataset_female_empl_age_cat6$INCOME)
sample.n_AGE_CAT_6 <- length(dataset_female_empl_age_cat6$INCOME)
sample.sd_AGE_CAT_6 <- sd(dataset_female_empl_age_cat6$INCOME)
sample.se_AGE_CAT_6 <- sample.sd_AGE_CAT_6/sqrt(sample.n_AGE_CAT_6)
alpha = 0.05
degrees.freedom_AGE_CAT_6 = sample.n_AGE_CAT_6 - 1
t.score_AGE_CAT_6 = qt(p=alpha/2, df=degrees.freedom_AGE_CAT_6,lower.tail=F)
margin.error_AGE_CAT_6 <- t.score_AGE_CAT_6 * sample.se_AGE_CAT_6
lower.bound_AGE_CAT_6 <- mean_AGE_CAT_6 - margin.error_AGE_CAT_6
upper.bound_AGE_CAT_6 <- mean_AGE_CAT_6 + margin.error_AGE_CAT_6

#create years since migration categories
dataset_female_empl_ysm_cat1 <- subset(dataset_female_empl, YSM_CAT == 1)
dataset_female_empl_ysm_cat2 <- subset(dataset_female_empl, YSM_CAT == 2)
dataset_female_empl_ysm_cat3 <- subset(dataset_female_empl, YSM_CAT == 3)
dataset_female_empl_ysm_cat4 <- subset(dataset_female_empl, YSM_CAT == 4)
dataset_female_empl_ysm_cat4 <- subset(dataset_female_empl, YSM_CAT == 5)

#calculate mean, lower and upper 95% confidence intervals
#YSM_CAT == 1
mean_YSM_CAT_1 <- mean(dataset_female_empl_ysm_cat1$INCOME)
sample.n_YSM_CAT_1 <- length(dataset_female_empl_ysm_cat1$INCOME)
sample.sd_YSM_CAT_1 <- sd(dataset_female_empl_ysm_cat1$INCOME)
sample.se_YSM_CAT_1 <- sample.sd_YSM_CAT_1/sqrt(sample.n_YSM_CAT_1)
alpha = 0.05
degrees.freedom_YSM_CAT_1 = sample.n_YSM_CAT_1 - 1
t.score_YSM_CAT_1 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_1,lower.tail=F)
margin.error_YSM_CAT_1 <- t.score_YSM_CAT_1 * sample.se_YSM_CAT_1
lower.bound_YSM_CAT_1 <- mean_YSM_CAT_1 - margin.error_YSM_CAT_1
upper.bound_YSM_CAT_1 <- mean_YSM_CAT_1 + margin.error_YSM_CAT_1

#YSM_CAT == 2
mean_YSM_CAT_2 <- mean(dataset_female_empl_ysm_cat2$INCOME)
sample.n_YSM_CAT_2 <- length(dataset_female_empl_ysm_cat2$INCOME)
sample.sd_YSM_CAT_2 <- sd(dataset_female_empl_ysm_cat2$INCOME)
sample.se_YSM_CAT_2 <- sample.sd_YSM_CAT_2/sqrt(sample.n_YSM_CAT_2)
alpha = 0.05
degrees.freedom_YSM_CAT_2 = sample.n_YSM_CAT_2 - 1
t.score_YSM_CAT_2 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_2,lower.tail=F)
margin.error_YSM_CAT_2 <- t.score_YSM_CAT_2 * sample.se_YSM_CAT_2
lower.bound_YSM_CAT_2 <- mean_YSM_CAT_2 - margin.error_YSM_CAT_2
upper.bound_YSM_CAT_2 <- mean_YSM_CAT_2 + margin.error_YSM_CAT_2

#YSM_CAT == 3
mean_YSM_CAT_3 <- mean(dataset_female_empl_ysm_cat3$INCOME)
sample.n_YSM_CAT_3 <- length(dataset_female_empl_ysm_cat3$INCOME)
sample.sd_YSM_CAT_3 <- sd(dataset_female_empl_ysm_cat3$INCOME)
sample.se_YSM_CAT_3 <- sample.sd_YSM_CAT_3/sqrt(sample.n_YSM_CAT_3)
alpha = 0.05
degrees.freedom_YSM_CAT_3 = sample.n_YSM_CAT_3 - 1
t.score_YSM_CAT_3 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_3,lower.tail=F)
margin.error_YSM_CAT_3 <- t.score_YSM_CAT_3 * sample.se_YSM_CAT_3
lower.bound_YSM_CAT_3 <- mean_YSM_CAT_3 - margin.error_YSM_CAT_3
upper.bound_YSM_CAT_3 <- mean_YSM_CAT_3 + margin.error_YSM_CAT_3

#YSM_CAT == 4
mean_YSM_CAT_4 <- mean(dataset_female_empl_ysm_cat4$INCOME)
sample.n_YSM_CAT_4 <- length(dataset_female_empl_ysm_cat4$INCOME)
sample.sd_YSM_CAT_4 <- sd(dataset_female_empl_ysm_cat4$INCOME)
sample.se_YSM_CAT_4 <- sample.sd_YSM_CAT_4/sqrt(sample.n_YSM_CAT_4)
alpha = 0.05
degrees.freedom_YSM_CAT_4 = sample.n_YSM_CAT_4 - 1
t.score_YSM_CAT_4 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_4,lower.tail=F)
margin.error_YSM_CAT_4 <- t.score_YSM_CAT_4 * sample.se_YSM_CAT_4
lower.bound_YSM_CAT_4 <- mean_YSM_CAT_4 - margin.error_YSM_CAT_4
upper.bound_YSM_CAT_4 <- mean_YSM_CAT_4 + margin.error_YSM_CAT_4

#YSM_CAT == 5
mean_YSM_CAT_5 <- mean(dataset_female_empl_ysm_cat5$INCOME)
sample.n_YSM_CAT_5 <- length(dataset_female_empl_ysm_cat5$INCOME)
sample.sd_YSM_CAT_5 <- sd(dataset_female_empl_ysm_cat5$INCOME)
sample.se_YSM_CAT_5 <- sample.sd_YSM_CAT_5/sqrt(sample.n_YSM_CAT_5)
alpha = 0.05
degrees.freedom_YSM_CAT_5 = sample.n_YSM_CAT_5 - 1
t.score_YSM_CAT_5 = qt(p=alpha/2, df=degrees.freedom_YSM_CAT_5,lower.tail=F)
margin.error_YSM_CAT_5 <- t.score_YSM_CAT_5 * sample.se_YSM_CAT_5
lower.bound_YSM_CAT_5 <- mean_YSM_CAT_5 - margin.error_YSM_CAT_5
upper.bound_YSM_CAT_5 <- mean_YSM_CAT_5 + margin.error_YSM_CAT_5

#create partner categories
dataset_female_empl_partner1 <- subset(dataset_female_empl, PARTNER == 1)
dataset_female_empl_partner2 <- subset(dataset_female_empl, PARTNER == 2)
dataset_female_empl_partner3 <- subset(dataset_female_empl, PARTNER == 3)
dataset_female_empl_partner4 <- subset(dataset_female_empl, PARTNER == 4)

#calculate mean, lower and upper 95% confidence intervals
#PARTNER == 1
mean_PARTNER_1 <- mean(dataset_female_empl_partner1$INCOME)
sample.n_PARTNER_1 <- length(dataset_female_empl_partner1$INCOME)
sample.sd_PARTNER_1 <- sd(dataset_female_empl_partner1$INCOME)
sample.se_PARTNER_1 <- sample.sd_PARTNER_1/sqrt(sample.n_PARTNER_1)
alpha = 0.05
degrees.freedom_PARTNER_1 = sample.n_PARTNER_1 - 1
t.score_PARTNER_1 = qt(p=alpha/2, df=degrees.freedom_PARTNER_1,lower.tail=F)
margin.error_PARTNER_1 <- t.score_PARTNER_1 * sample.se_PARTNER_1
lower.bound_PARTNER_1 <- mean_PARTNER_1 - margin.error_PARTNER_1
upper.bound_PARTNER_1 <- mean_PARTNER_1 + margin.error_PARTNER_1

#PARTNER == 2
mean_PARTNER_2 <- mean(dataset_female_empl_partner2$INCOME)
sample.n_PARTNER_2 <- length(dataset_female_empl_partner2$INCOME)
sample.sd_PARTNER_2 <- sd(dataset_female_empl_partner2$INCOME)
sample.se_PARTNER_2 <- sample.sd_PARTNER_2/sqrt(sample.n_PARTNER_2)
alpha = 0.05
degrees.freedom_PARTNER_2 = sample.n_PARTNER_2 - 1
t.score_PARTNER_2 = qt(p=alpha/2, df=degrees.freedom_PARTNER_2,lower.tail=F)
margin.error_PARTNER_2 <- t.score_PARTNER_2 * sample.se_PARTNER_2
lower.bound_PARTNER_2 <- mean_PARTNER_2 - margin.error_PARTNER_2
upper.bound_PARTNER_2 <- mean_PARTNER_2 + margin.error_PARTNER_2

#PARTNER == 3
mean_PARTNER_3 <- mean(dataset_female_empl_partner3$INCOME)
sample.n_PARTNER_3 <- length(dataset_female_empl_partner3$INCOME)
sample.sd_PARTNER_3 <- sd(dataset_female_empl_partner3$INCOME)
sample.se_PARTNER_3 <- sample.sd_PARTNER_3/sqrt(sample.n_PARTNER_3)
alpha = 0.05
degrees.freedom_PARTNER_3 = sample.n_PARTNER_3 - 1
t.score_PARTNER_3 = qt(p=alpha/2, df=degrees.freedom_PARTNER_3,lower.tail=F)
margin.error_PARTNER_3 <- t.score_PARTNER_3 * sample.se_PARTNER_3
lower.bound_PARTNER_3 <- mean_PARTNER_3 - margin.error_PARTNER_3
upper.bound_PARTNER_3 <- mean_PARTNER_3 + margin.error_PARTNER_3

#PARTNER == 4
mean_PARTNER_4 <- mean(dataset_female_empl_partner4$INCOME)
sample.n_PARTNER_4 <- length(dataset_female_empl_partner4$INCOME)
sample.sd_PARTNER_4 <- sd(dataset_female_empl_partner4$INCOME)
sample.se_PARTNER_4 <- sample.sd_PARTNER_4/sqrt(sample.n_PARTNER_4)
alpha = 0.05
degrees.freedom_PARTNER_4 = sample.n_PARTNER_4 - 1
t.score_PARTNER_4 = qt(p=alpha/2, df=degrees.freedom_PARTNER_4,lower.tail=F)
margin.error_PARTNER_4 <- t.score_PARTNER_4 * sample.se_PARTNER_4
lower.bound_PARTNER_4 <- mean_PARTNER_4 - margin.error_PARTNER_4
upper.bound_PARTNER_4 <- mean_PARTNER_4 + margin.error_PARTNER_4

#create child categories
dataset_female_empl_child0 <- subset(dataset_female_empl, CHILD == 0)
dataset_female_empl_child1 <- subset(dataset_female_empl, CHILD == 1)

#calculate mean, lower and upper 95% confidence intervals
#CHILD == 0
mean_CHILD_0 <- mean(dataset_female_empl_child0$INCOME)
sample.n_CHILD_0 <- length(dataset_female_empl_child0$INCOME)
sample.sd_CHILD_0 <- sd(dataset_female_empl_child0$INCOME)
sample.se_CHILD_0 <- sample.sd_CHILD_0/sqrt(sample.n_CHILD_0)
alpha = 0.05
degrees.freedom_CHILD_0 = sample.n_CHILD_0 - 1
t.score_CHILD_0 = qt(p=alpha/2, df=degrees.freedom_CHILD_0,lower.tail=F)
margin.error_CHILD_0 <- t.score_CHILD_0 * sample.se_CHILD_0
lower.bound_CHILD_0 <- mean_CHILD_0 - margin.error_CHILD_0
upper.bound_CHILD_0 <- mean_CHILD_0 + margin.error_CHILD_0

#CHILD == 1
mean_CHILD_1 <- mean(dataset_female_empl_child1$INCOME)
sample.n_CHILD_1 <- length(dataset_female_empl_child1$INCOME)
sample.sd_CHILD_1 <- sd(dataset_female_empl_child1$INCOME)
sample.se_CHILD_1 <- sample.sd_CHILD_1/sqrt(sample.n_CHILD_1)
alpha = 0.05
degrees.freedom_CHILD_1 = sample.n_CHILD_1 - 1
t.score_CHILD_1 = qt(p=alpha/2, df=degrees.freedom_CHILD_1,lower.tail=F)
margin.error_CHILD_1 <- t.score_CHILD_1 * sample.se_CHILD_1
lower.bound_CHILD_1 <- mean_CHILD_1 - margin.error_CHILD_1
upper.bound_CHILD_1 <- mean_CHILD_1 + margin.error_CHILD_1

#create development country of origin categories
dataset_female_empl_developmentrec_Q_1 <- subset(dataset_female_empl, DEVELOPMENTREC_Q == 1)
dataset_female_empl_developmentrec_Q_2 <- subset(dataset_female_empl, DEVELOPMENTREC_Q == 2)
dataset_female_empl_developmentrec_Q_3 <- subset(dataset_female_empl, DEVELOPMENTREC_Q == 3)
dataset_female_empl_developmentrec_Q_4 <- subset(dataset_female_empl, DEVELOPMENTREC_Q == 4)

#calculate mean, lower and upper 95% confidence intervals
#DEVELOPMENTREC_Q == 1
mean_DEVELOPMENTREC_Q_1 <- mean(dataset_female_empl_developmentrec_Q_1$INCOME)
sample.n_DEVELOPMENTREC_Q_1 <- length(dataset_female_empl_developmentrec_Q_1$INCOME)
sample.sd_DEVELOPMENTREC_Q_1 <- sd(dataset_female_empl_developmentrec_Q_1$INCOME)
sample.se_DEVELOPMENTREC_Q_1 <- sample.sd_DEVELOPMENTREC_Q_1/sqrt(sample.n_DEVELOPMENTREC_Q_1)
alpha = 0.05
degrees.freedom_DEVELOPMENTREC_Q_1 = sample.n_DEVELOPMENTREC_Q_1 - 1
t.score_DEVELOPMENTREC_Q_1 = qt(p=alpha/2, df=degrees.freedom_DEVELOPMENTREC_Q_1,lower.tail=F)
margin.error_DEVELOPMENTREC_Q_1 <- t.score_DEVELOPMENTREC_Q_1 * sample.se_DEVELOPMENTREC_Q_1
lower.bound_DEVELOPMENTREC_Q_1 <- mean_DEVELOPMENTREC_Q_1 - margin.error_DEVELOPMENTREC_Q_1
upper.bound_DEVELOPMENTREC_Q_1 <- mean_DEVELOPMENTREC_Q_1 + margin.error_DEVELOPMENTREC_Q_1

#DEVELOPMENTREC_Q == 2
mean_DEVELOPMENTREC_Q_2 <- mean(dataset_female_empl_developmentrec_Q_2$INCOME)
sample.n_DEVELOPMENTREC_Q_2 <- length(dataset_female_empl_developmentrec_Q_2$INCOME)
sample.sd_DEVELOPMENTREC_Q_2 <- sd(dataset_female_empl_developmentrec_Q_2$INCOME)
sample.se_DEVELOPMENTREC_Q_2 <- sample.sd_DEVELOPMENTREC_Q_2/sqrt(sample.n_DEVELOPMENTREC_Q_2)
alpha = 0.05
degrees.freedom_DEVELOPMENTREC_Q_2 = sample.n_DEVELOPMENTREC_Q_2 - 1
t.score_DEVELOPMENTREC_Q_2 = qt(p=alpha/2, df=degrees.freedom_DEVELOPMENTREC_Q_2,lower.tail=F)
margin.error_DEVELOPMENTREC_Q_2 <- t.score_DEVELOPMENTREC_Q_2 * sample.se_DEVELOPMENTREC_Q_2
lower.bound_DEVELOPMENTREC_Q_2 <- mean_DEVELOPMENTREC_Q_2 - margin.error_DEVELOPMENTREC_Q_2
upper.bound_DEVELOPMENTREC_Q_2 <- mean_DEVELOPMENTREC_Q_2 + margin.error_DEVELOPMENTREC_Q_2

#DEVELOPMENTREC_Q == 3
mean_DEVELOPMENTREC_Q_3 <- mean(dataset_female_empl_developmentrec_Q_3$INCOME)
sample.n_DEVELOPMENTREC_Q_3 <- length(dataset_female_empl_developmentrec_Q_3$INCOME)
sample.sd_DEVELOPMENTREC_Q_3 <- sd(dataset_female_empl_developmentrec_Q_3$INCOME)
sample.se_DEVELOPMENTREC_Q_3 <- sample.sd_DEVELOPMENTREC_Q_3/sqrt(sample.n_DEVELOPMENTREC_Q_3)
alpha = 0.05
degrees.freedom_DEVELOPMENTREC_Q_3 = sample.n_DEVELOPMENTREC_Q_3 - 1
t.score_DEVELOPMENTREC_Q_3 = qt(p=alpha/2, df=degrees.freedom_DEVELOPMENTREC_Q_3,lower.tail=F)
margin.error_DEVELOPMENTREC_Q_3 <- t.score_DEVELOPMENTREC_Q_3 * sample.se_DEVELOPMENTREC_Q_3
lower.bound_DEVELOPMENTREC_Q_3 <- mean_DEVELOPMENTREC_Q_3 - margin.error_DEVELOPMENTREC_Q_3
upper.bound_DEVELOPMENTREC_Q_3 <- mean_DEVELOPMENTREC_Q_3 + margin.error_DEVELOPMENTREC_Q_3

#DEVELOPMENTREC_Q == 4
mean_DEVELOPMENTREC_Q_4 <- mean(dataset_female_empl_developmentrec_Q_4$INCOME)
sample.n_DEVELOPMENTREC_Q_4 <- length(dataset_female_empl_developmentrec_Q_4$INCOME)
sample.sd_DEVELOPMENTREC_Q_4 <- sd(dataset_female_empl_developmentrec_Q_4$INCOME)
sample.se_DEVELOPMENTREC_Q_4 <- sample.sd_DEVELOPMENTREC_Q_4/sqrt(sample.n_DEVELOPMENTREC_Q_4)
alpha = 0.05
degrees.freedom_DEVELOPMENTREC_Q_4 = sample.n_DEVELOPMENTREC_Q_4 - 1
t.score_DEVELOPMENTREC_Q_4 = qt(p=alpha/2, df=degrees.freedom_DEVELOPMENTREC_Q_4,lower.tail=F)
margin.error_DEVELOPMENTREC_Q_4 <- t.score_DEVELOPMENTREC_Q_4 * sample.se_DEVELOPMENTREC_Q_4
lower.bound_DEVELOPMENTREC_Q_4 <- mean_DEVELOPMENTREC_Q_4 - margin.error_DEVELOPMENTREC_Q_4
upper.bound_DEVELOPMENTREC_Q_4 <- mean_DEVELOPMENTREC_Q_4 + margin.error_DEVELOPMENTREC_Q_4


#create EU country of origin categories
dataset_female_empl_EU_0 <- subset(dataset_female_empl, EU == 0)
dataset_female_empl_EU_1 <- subset(dataset_female_empl, EU == 1)

#calculate mean, lower and upper 95% confidence intervals
#EU == 0
mean_EU_0 <- mean(dataset_female_empl_EU_0$INCOME)
sample.n_EU_0 <- length(dataset_female_empl_EU_0$INCOME)
sample.sd_EU_0 <- sd(dataset_female_empl_EU_0$INCOME)
sample.se_EU_0 <- sample.sd_EU_0/sqrt(sample.n_EU_0)
alpha = 0.05
degrees.freedom_EU_0 = sample.n_EU_0 - 1
t.score_EU_0 = qt(p=alpha/2, df=degrees.freedom_EU_0,lower.tail=F)
margin.error_EU_0 <- t.score_EU_0 * sample.se_EU_0
lower.bound_EU_0 <- mean_EU_0 - margin.error_EU_0
upper.bound_EU_0 <- mean_EU_0 + margin.error_EU_0

#EU == 1
mean_EU_1 <- mean(dataset_female_empl_EU_1$INCOME)
sample.n_EU_1 <- length(dataset_female_empl_EU_1$INCOME)
sample.sd_EU_1 <- sd(dataset_female_empl_EU_1$INCOME)
sample.se_EU_1 <- sample.sd_EU_1/sqrt(sample.n_EU_1)
alpha = 0.05
degrees.freedom_EU_1 = sample.n_EU_1 - 1
t.score_EU_1 = qt(p=alpha/2, df=degrees.freedom_EU_1,lower.tail=F)
margin.error_EU_1 <- t.score_EU_1 * sample.se_EU_1
lower.bound_EU_1 <- mean_EU_1 - margin.error_EU_1
upper.bound_EU_1 <- mean_EU_1 + margin.error_EU_1


###########     
# Table 9 #
###########

#descriptive statistics
table(dataset_main$GENDER)
prop.table(dataset_main$GENDER)
table(dataset_main$AGE_CAT)
prop.table(dataset_main$AGE_CAT)
table(dataset_main$YSM_CAT)
prop.table(dataset_main$YSM_CAT)
table(dataset_main$PARTNER)
prop.table(dataset_main$PARTNER)
table(dataset_main$CHILD)
prop.table(dataset_main$CHILD)
table(dataset_main$DEVELOPMENT_REC_Q)
prop.table(dataset_main$DEVELOPMENT_REC_Q)
table(dataset_main$EU)
prop.table(dataset_main$EU)


############     
# Table 10 #
############

#select male immigrants with employment from development quartiles
dataset_male_empl_low <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & DEVELOPMENT_REC <= 2)
dataset_male_empl_high <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & REGION >= 3)

#regression table 10: male immigrants with employment from low development countries
result_Table_10_male_low <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                data = dataset_male_empl_low, 
                                model = 'within', 
                                effect = 'individual', 
                                index = c("ID","YEAR"))

#regression table 10: male immigrants with employment from high development countries
result_Table_10_male_high <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                 RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                 data = dataset_male_empl_high, 
                                 model = 'within', 
                                 effect = 'individual', 
                                 index = c("ID","YEAR"))

#Select female immigrants with employment from development quartiles
dataset_female_empl_low <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & DEVELOPMENT_REC <= 2)
dataset_female_empl_high <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & REGION >= 3)

#regression table 10: female immigrants with employment from low development countries
result_Table_10_female_low <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                  RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                  data = dataset_female_empl_low, 
                                  model = 'within', 
                                  effect = 'individual', 
                                  index = c("ID","YEAR"))

#regression table 10: female immigrants with employment from high development countries
result_Table_10_female_high <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                   RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                   data = dataset_female_empl_high, 
                                   model = 'within', 
                                   effect = 'individual', 
                                   index = c("ID","YEAR"))


############     
# Table 11 #
############

#select male immigrants with employment who are not naturalised
dataset_male_empl_nonat <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & NATURALISED == 0)

#select male immigrants with employment who are  naturalised
dataset_male_empl_nat <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & NATURALISED == 1)

#descriptive statistics labor market sector
prop.table(dataset_male_empl_nonat$SECTOR)
prop.table(dataset_male_empl_nat$SECTOR)

#select female immigrants with employment who are not naturalised
dataset_male_empl_nonat <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & NATURALISED == 0)

#select female immigrants with employment who are  naturalised
dataset_male_empl_nat <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & NATURALISED == 1)

#descriptive statistics labor market sector
prop.table(dataset_female_empl_nonat$SECTOR)
prop.table(dataset_female_empl_nat$SECTOR)


############     
# Table 12 #
############

#select male immigrants with employment 
dataset_male_empl <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1)

#create labor market sector categories
dataset_male_empl_sector_cat1 <- subset(dataset_male_empl, SECTOR == 1)
dataset_male_empl_sector_cat2 <- subset(dataset_male_empl, SECTOR == 2)
dataset_male_empl_sector_cat3 <- subset(dataset_male_empl, SECTOR == 3)
dataset_male_empl_sector_cat4 <- subset(dataset_male_empl, SECTOR == 4)
dataset_male_empl_sector_cat5 <- subset(dataset_male_empl, SECTOR == 5)
dataset_male_empl_sector_cat6 <- subset(dataset_male_empl, SECTOR == 6)
dataset_male_empl_sector_cat7 <- subset(dataset_male_empl, SECTOR == 7)
dataset_male_empl_sector_cat8 <- subset(dataset_male_empl, SECTOR == 8)
dataset_male_empl_sector_cat9 <- subset(dataset_male_empl, SECTOR == 9)
dataset_male_empl_sector_cat10 <- subset(dataset_male_empl, SECTOR == 10)

#calculate mean, lower and upper 95% confidence intervals
#SECTOR == 1
mean_SECTOR_1 <- mean(dataset_male_empl_sector_cat1$INCOME)
sample.n_SECTOR_1 <- length(dataset_male_empl_sector_cat1$INCOME)
sample.sd_SECTOR_1 <- sd(dataset_male_empl_sector_cat1$INCOME)
sample.se_SECTOR_1 <- sample.sd_SECTOR_1/sqrt(sample.n_SECTOR_1)
alpha = 0.05
degrees.freedom_SECTOR_1 = sample.n_SECTOR_1 - 1
t.score_SECTOR_1 = qt(p=alpha/2, df=degrees.freedom_SECTOR_1,lower.tail=F)
margin.error_SECTOR_1 <- t.score_SECTOR_1 * sample.se_SECTOR_1
lower.bound_SECTOR_1 <- mean_SECTOR_1 - margin.error_SECTOR_1
upper.bound_SECTOR_1 <- mean_SECTOR_1 + margin.error_SECTOR_1

#SECTOR == 2
mean_sector_2 <- mean(dataset_male_empl_sector_cat2$INCOME)
sample.n_sector_2 <- length(dataset_male_empl_sector_cat2$INCOME)
sample.sd_sector_2 <- sd(dataset_male_empl_sector_cat2$INCOME)
sample.se_sector_2 <- sample.sd_sector_2/sqrt(sample.n_sector_2)
alpha = 0.05
degrees.freedom_sector_2 = sample.n_sector_2 - 1
t.score_sector_2 = qt(p=alpha/2, df=degrees.freedom_sector_2,lower.tail=F)
margin.error_sector_2 <- t.score_sector_2 * sample.se_sector_2
lower.bound_sector_2 <- mean_sector_2 - margin.error_sector_2
upper.bound_sector_2 <- mean_sector_2 + margin.error_sector_2

#SECTOR == 3
mean_sector_3 <- mean(dataset_male_empl_sector_cat3$INCOME)
sample.n_sector_3 <- length(dataset_male_empl_sector_cat3$INCOME)
sample.sd_sector_3 <- sd(dataset_male_empl_sector_cat3$INCOME)
sample.se_sector_3 <- sample.sd_sector_3/sqrt(sample.n_sector_3)
alpha = 0.05
degrees.freedom_sector_3 = sample.n_sector_3 - 1
t.score_sector_3 = qt(p=alpha/2, df=degrees.freedom_sector_3,lower.tail=F)
margin.error_sector_3 <- t.score_sector_3 * sample.se_sector_3
lower.bound_sector_3 <- mean_sector_3 - margin.error_sector_3
upper.bound_sector_3 <- mean_sector_3 + margin.error_sector_3

#SECTOR == 4
mean_sector_4 <- mean(dataset_male_empl_sector_cat4$INCOME)
sample.n_sector_4 <- length(dataset_male_empl_sector_cat4$INCOME)
sample.sd_sector_4 <- sd(dataset_male_empl_sector_cat4$INCOME)
sample.se_sector_4 <- sample.sd_sector_4/sqrt(sample.n_sector_4)
alpha = 0.05
degrees.freedom_sector_4 = sample.n_sector_4 - 1
t.score_sector_4 = qt(p=alpha/2, df=degrees.freedom_sector_4,lower.tail=F)
margin.error_sector_4 <- t.score_sector_4 * sample.se_sector_4
lower.bound_sector_4 <- mean_sector_4 - margin.error_sector_4
upper.bound_sector_4 <- mean_sector_4 + margin.error_sector_4

#SECTOR == 5
mean_sector_5 <- mean(dataset_male_empl_sector_cat5$INCOME)
sample.n_sector_5 <- length(dataset_male_empl_sector_cat5$INCOME)
sample.sd_sector_5 <- sd(dataset_male_empl_sector_cat5$INCOME)
sample.se_sector_5 <- sample.sd_sector_5/sqrt(sample.n_sector_5)
alpha = 0.05
degrees.freedom_sector_5 = sample.n_sector_5 - 1
t.score_sector_5 = qt(p=alpha/2, df=degrees.freedom_sector_5,lower.tail=F)
margin.error_sector_5 <- t.score_sector_5 * sample.se_sector_5
lower.bound_sector_5 <- mean_sector_5 - margin.error_sector_5
upper.bound_sector_5 <- mean_sector_5 + margin.error_sector_5

#SECTOR == 6
mean_sector_6 <- mean(dataset_male_empl_sector_cat6$INCOME)
sample.n_sector_6 <- length(dataset_male_empl_sector_cat6$INCOME)
sample.sd_sector_6 <- sd(dataset_male_empl_sector_cat6$INCOME)
sample.se_sector_6 <- sample.sd_sector_6/sqrt(sample.n_sector_6)
alpha = 0.05
degrees.freedom_sector_6 = sample.n_sector_6 - 1
t.score_sector_6 = qt(p=alpha/2, df=degrees.freedom_sector_6,lower.tail=F)
margin.error_sector_6 <- t.score_sector_6 * sample.se_sector_6
lower.bound_sector_6 <- mean_sector_6 - margin.error_sector_6
upper.bound_sector_6 <- mean_sector_6 + margin.error_sector_6

#SECTOR == 7
mean_sector_7 <- mean(dataset_male_empl_sector_cat7$INCOME)
sample.n_sector_7 <- length(dataset_male_empl_sector_cat7$INCOME)
sample.sd_sector_7 <- sd(dataset_male_empl_sector_cat7$INCOME)
sample.se_sector_7 <- sample.sd_sector_7/sqrt(sample.n_sector_7)
alpha = 0.05
degrees.freedom_sector_7 = sample.n_sector_7 - 1
t.score_sector_7 = qt(p=alpha/2, df=degrees.freedom_sector_7,lower.tail=F)
margin.error_sector_7 <- t.score_sector_7 * sample.se_sector_7
lower.bound_sector_7 <- mean_sector_7 - margin.error_sector_7
upper.bound_sector_7 <- mean_sector_7 + margin.error_sector_7

#SECTOR == 8
mean_sector_8 <- mean(dataset_male_empl_sector_cat8$INCOME)
sample.n_sector_8 <- length(dataset_male_empl_sector_cat8$INCOME)
sample.sd_sector_8 <- sd(dataset_male_empl_sector_cat8$INCOME)
sample.se_sector_8 <- sample.sd_sector_8/sqrt(sample.n_sector_8)
alpha = 0.05
degrees.freedom_sector_8 = sample.n_sector_8 - 1
t.score_sector_8 = qt(p=alpha/2, df=degrees.freedom_sector_8,lower.tail=F)
margin.error_sector_8 <- t.score_sector_8 * sample.se_sector_8
lower.bound_sector_8 <- mean_sector_8 - margin.error_sector_8
upper.bound_sector_8 <- mean_sector_8 + margin.error_sector_8

#SECTOR == 9
mean_sector_9 <- mean(dataset_male_empl_sector_cat9$INCOME)
sample.n_sector_9 <- length(dataset_male_empl_sector_cat9$INCOME)
sample.sd_sector_9 <- sd(dataset_male_empl_sector_cat9$INCOME)
sample.se_sector_9 <- sample.sd_sector_9/sqrt(sample.n_sector_9)
alpha = 0.05
degrees.freedom_sector_9 = sample.n_sector_9 - 1
t.score_sector_9 = qt(p=alpha/2, df=degrees.freedom_sector_9,lower.tail=F)
margin.error_sector_9 <- t.score_sector_9 * sample.se_sector_9
lower.bound_sector_9 <- mean_sector_9 - margin.error_sector_9
upper.bound_sector_9 <- mean_sector_9 + margin.error_sector_9

#SECTOR == 10
mean_sector_10 <- mean(dataset_male_empl_sector_cat10$INCOME)
sample.n_sector_10 <- length(dataset_male_empl_sector_cat10$INCOME)
sample.sd_sector_10 <- sd(dataset_male_empl_sector_cat10$INCOME)
sample.se_sector_10 <- sample.sd_sector_10/sqrt(sample.n_sector_10)
alpha = 0.05
degrees.freedom_sector_10 = sample.n_sector_10 - 1
t.score_sector_10 = qt(p=alpha/2, df=degrees.freedom_sector_10,lower.tail=F)
margin.error_sector_10 <- t.score_sector_10 * sample.se_sector_10
lower.bound_sector_10 <- mean_sector_10 - margin.error_sector_10
upper.bound_sector_10 <- mean_sector_10 + margin.error_sector_10

#select female immigrants with employment 
dataset_female_empl <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1)

#create labor market sector categories
dataset_female_empl_sector_cat1 <- subset(dataset_female_empl, SECTOR == 1)
dataset_female_empl_sector_cat2 <- subset(dataset_female_empl, SECTOR == 2)
dataset_female_empl_sector_cat3 <- subset(dataset_female_empl, SECTOR == 3)
dataset_female_empl_sector_cat4 <- subset(dataset_female_empl, SECTOR == 4)
dataset_female_empl_sector_cat5 <- subset(dataset_female_empl, SECTOR == 5)
dataset_female_empl_sector_cat6 <- subset(dataset_female_empl, SECTOR == 6)
dataset_female_empl_sector_cat7 <- subset(dataset_female_empl, SECTOR == 7)
dataset_female_empl_sector_cat8 <- subset(dataset_female_empl, SECTOR == 8)
dataset_female_empl_sector_cat9 <- subset(dataset_female_empl, SECTOR == 9)
dataset_female_empl_sector_cat10 <- subset(dataset_female_empl, SECTOR == 10)

#calculate mean, lower and upper 95% confidence intervals
#SECTOR == 1
mean_SECTOR_1 <- mean(dataset_female_empl_sector_cat1$INCOME)
sample.n_SECTOR_1 <- length(dataset_female_empl_sector_cat1$INCOME)
sample.sd_SECTOR_1 <- sd(dataset_female_empl_sector_cat1$INCOME)
sample.se_SECTOR_1 <- sample.sd_SECTOR_1/sqrt(sample.n_SECTOR_1)
alpha = 0.05
degrees.freedom_SECTOR_1 = sample.n_SECTOR_1 - 1
t.score_SECTOR_1 = qt(p=alpha/2, df=degrees.freedom_SECTOR_1,lower.tail=F)
margin.error_SECTOR_1 <- t.score_SECTOR_1 * sample.se_SECTOR_1
lower.bound_SECTOR_1 <- mean_SECTOR_1 - margin.error_SECTOR_1
upper.bound_SECTOR_1 <- mean_SECTOR_1 + margin.error_SECTOR_1

#SECTOR == 2
mean_sector_2 <- mean(dataset_female_empl_sector_cat2$INCOME)
sample.n_sector_2 <- length(dataset_female_empl_sector_cat2$INCOME)
sample.sd_sector_2 <- sd(dataset_female_empl_sector_cat2$INCOME)
sample.se_sector_2 <- sample.sd_sector_2/sqrt(sample.n_sector_2)
alpha = 0.05
degrees.freedom_sector_2 = sample.n_sector_2 - 1
t.score_sector_2 = qt(p=alpha/2, df=degrees.freedom_sector_2,lower.tail=F)
margin.error_sector_2 <- t.score_sector_2 * sample.se_sector_2
lower.bound_sector_2 <- mean_sector_2 - margin.error_sector_2
upper.bound_sector_2 <- mean_sector_2 + margin.error_sector_2

#SECTOR == 3
mean_sector_3 <- mean(dataset_female_empl_sector_cat3$INCOME)
sample.n_sector_3 <- length(dataset_female_empl_sector_cat3$INCOME)
sample.sd_sector_3 <- sd(dataset_female_empl_sector_cat3$INCOME)
sample.se_sector_3 <- sample.sd_sector_3/sqrt(sample.n_sector_3)
alpha = 0.05
degrees.freedom_sector_3 = sample.n_sector_3 - 1
t.score_sector_3 = qt(p=alpha/2, df=degrees.freedom_sector_3,lower.tail=F)
margin.error_sector_3 <- t.score_sector_3 * sample.se_sector_3
lower.bound_sector_3 <- mean_sector_3 - margin.error_sector_3
upper.bound_sector_3 <- mean_sector_3 + margin.error_sector_3

#SECTOR == 4
mean_sector_4 <- mean(dataset_female_empl_sector_cat4$INCOME)
sample.n_sector_4 <- length(dataset_female_empl_sector_cat4$INCOME)
sample.sd_sector_4 <- sd(dataset_female_empl_sector_cat4$INCOME)
sample.se_sector_4 <- sample.sd_sector_4/sqrt(sample.n_sector_4)
alpha = 0.05
degrees.freedom_sector_4 = sample.n_sector_4 - 1
t.score_sector_4 = qt(p=alpha/2, df=degrees.freedom_sector_4,lower.tail=F)
margin.error_sector_4 <- t.score_sector_4 * sample.se_sector_4
lower.bound_sector_4 <- mean_sector_4 - margin.error_sector_4
upper.bound_sector_4 <- mean_sector_4 + margin.error_sector_4

#SECTOR == 5
mean_sector_5 <- mean(dataset_female_empl_sector_cat5$INCOME)
sample.n_sector_5 <- length(dataset_female_empl_sector_cat5$INCOME)
sample.sd_sector_5 <- sd(dataset_female_empl_sector_cat5$INCOME)
sample.se_sector_5 <- sample.sd_sector_5/sqrt(sample.n_sector_5)
alpha = 0.05
degrees.freedom_sector_5 = sample.n_sector_5 - 1
t.score_sector_5 = qt(p=alpha/2, df=degrees.freedom_sector_5,lower.tail=F)
margin.error_sector_5 <- t.score_sector_5 * sample.se_sector_5
lower.bound_sector_5 <- mean_sector_5 - margin.error_sector_5
upper.bound_sector_5 <- mean_sector_5 + margin.error_sector_5

#SECTOR == 6
mean_sector_6 <- mean(dataset_female_empl_sector_cat6$INCOME)
sample.n_sector_6 <- length(dataset_female_empl_sector_cat6$INCOME)
sample.sd_sector_6 <- sd(dataset_female_empl_sector_cat6$INCOME)
sample.se_sector_6 <- sample.sd_sector_6/sqrt(sample.n_sector_6)
alpha = 0.05
degrees.freedom_sector_6 = sample.n_sector_6 - 1
t.score_sector_6 = qt(p=alpha/2, df=degrees.freedom_sector_6,lower.tail=F)
margin.error_sector_6 <- t.score_sector_6 * sample.se_sector_6
lower.bound_sector_6 <- mean_sector_6 - margin.error_sector_6
upper.bound_sector_6 <- mean_sector_6 + margin.error_sector_6

#SECTOR == 7
mean_sector_7 <- mean(dataset_female_empl_sector_cat7$INCOME)
sample.n_sector_7 <- length(dataset_female_empl_sector_cat7$INCOME)
sample.sd_sector_7 <- sd(dataset_female_empl_sector_cat7$INCOME)
sample.se_sector_7 <- sample.sd_sector_7/sqrt(sample.n_sector_7)
alpha = 0.05
degrees.freedom_sector_7 = sample.n_sector_7 - 1
t.score_sector_7 = qt(p=alpha/2, df=degrees.freedom_sector_7,lower.tail=F)
margin.error_sector_7 <- t.score_sector_7 * sample.se_sector_7
lower.bound_sector_7 <- mean_sector_7 - margin.error_sector_7
upper.bound_sector_7 <- mean_sector_7 + margin.error_sector_7

#SECTOR == 8
mean_sector_8 <- mean(dataset_female_empl_sector_cat8$INCOME)
sample.n_sector_8 <- length(dataset_female_empl_sector_cat8$INCOME)
sample.sd_sector_8 <- sd(dataset_female_empl_sector_cat8$INCOME)
sample.se_sector_8 <- sample.sd_sector_8/sqrt(sample.n_sector_8)
alpha = 0.05
degrees.freedom_sector_8 = sample.n_sector_8 - 1
t.score_sector_8 = qt(p=alpha/2, df=degrees.freedom_sector_8,lower.tail=F)
margin.error_sector_8 <- t.score_sector_8 * sample.se_sector_8
lower.bound_sector_8 <- mean_sector_8 - margin.error_sector_8
upper.bound_sector_8 <- mean_sector_8 + margin.error_sector_8

#SECTOR == 9
mean_sector_9 <- mean(dataset_female_empl_sector_cat9$INCOME)
sample.n_sector_9 <- length(dataset_female_empl_sector_cat9$INCOME)
sample.sd_sector_9 <- sd(dataset_female_empl_sector_cat9$INCOME)
sample.se_sector_9 <- sample.sd_sector_9/sqrt(sample.n_sector_9)
alpha = 0.05
degrees.freedom_sector_9 = sample.n_sector_9 - 1
t.score_sector_9 = qt(p=alpha/2, df=degrees.freedom_sector_9,lower.tail=F)
margin.error_sector_9 <- t.score_sector_9 * sample.se_sector_9
lower.bound_sector_9 <- mean_sector_9 - margin.error_sector_9
upper.bound_sector_9 <- mean_sector_9 + margin.error_sector_9

#SECTOR == 10
mean_sector_10 <- mean(dataset_female_empl_sector_cat10$INCOME)
sample.n_sector_10 <- length(dataset_female_empl_sector_cat10$INCOME)
sample.sd_sector_10 <- sd(dataset_female_empl_sector_cat10$INCOME)
sample.se_sector_10 <- sample.sd_sector_10/sqrt(sample.n_sector_10)
alpha = 0.05
degrees.freedom_sector_10 = sample.n_sector_10 - 1
t.score_sector_10 = qt(p=alpha/2, df=degrees.freedom_sector_10,lower.tail=F)
margin.error_sector_10 <- t.score_sector_10 * sample.se_sector_10
lower.bound_sector_10 <- mean_sector_10 - margin.error_sector_10
upper.bound_sector_10 <- mean_sector_10 + margin.error_sector_10


############     
# Table 13 #
############

#select male immigrants with employment without right-truncation
dataset_male_empl_notrunc <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & TRUNCATION == 0)

#regression table 13: male immigrants with employment without right-truncation
result_Table_13_model_1_male <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                    RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                    data = dataset_male_empl_notrunc, 
                                    model = 'within', 
                                    effect = 'individual', 
                                    index = c("ID","YEAR"))

#2elect female immigrants with employment without right-truncation
dataset_female_empl_notrunc <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & TRUNCATION == 0)

#regression table 13: female immigrants with employment without right-truncation
result_Table_13_model_1_female <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                      RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                      data = dataset_female_empl_notrunc, 
                                      model = 'within', 
                                      effect = 'individual', 
                                      index = c("ID","YEAR"))

#select male immigrants without right-truncation
dataset_male_notrunc <- subset(dataset_main, GENDER == 1 & TRUNCATION == 0)

#regression table 13: male immigrants without right-truncation
result_Table_13_model_2_male <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                    RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                    data = dataset_male_notrunc, 
                                    model = 'within', 
                                    effect = 'individual', 
                                    index = c("ID","YEAR"))

#select female immigrants without right-truncation
dataset_female_notrunc <- subset(dataset_main, GENDER == 2 & TRUNCATION == 0)

#regression table 13: female immigrants without right-truncation
result_Table_13_model_2_female <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                      RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                      data = dataset_female_notrunc, 
                                      model = 'within', 
                                      effect = 'individual', 
                                      index = c("ID","YEAR"))


############     
# Table 14 #
############

#select male immigrants with employment and available information on education
dataset_male_empl_edu <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & EDUCATION >= 1)

#regression table 14: male immigrants with employment and available information on education
result_Table_14_male <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                            RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                            data = dataset_male_empl_edu, 
                            model = 'within', 
                            effect = 'individual', 
                            index = c("ID","YEAR"))

#regression table 14: male immigrants with employment and available information on education including education control
result_Table_14_male_edu <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + as.factor(EDUCATION), 
                                data = dataset_male_empl_edu, 
                                model = 'within', 
                                effect = 'individual', 
                                index = c("ID","YEAR"))

#select female immigrants with employment and available information on education
dataset_female_empl_edu <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & EDUCATION >= 1)

#regression table 14: female immigrants with employment and available information on education
result_Table_14_female <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_female_empl_edu, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))

#regression table 14: female immigrants with employment and available information on education including education control
result_Table_14_female_edu <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                  RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + as.factor(EDUCATION), 
                                  data = dataset_female_empl_edu, 
                                  model = 'within', 
                                  effect = 'individual', 
                                  index = c("ID","YEAR"))


############     
# Table 15 #
############

#select male immigrants with employment and cohort 1996-1997
dataset_male_empl_early <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & IMMIGRATIONYEAR >= 1996 & IMMIGRATIONYEAR <= 1997)

#select male immigrants with employment and cohort 2001-2002
dataset_male_empl_late <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & IMMIGRATIONYEAR >= 2001 & IMMIGRATIONYEAR <= 2002)

#regression table 15: male immigrants with employment and from early cohorts
result_Table_15_male_early <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_male_empl_early, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))

#regression table 15: male immigrants with employment and from late cohorts
result_Table_15_male_late <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                 RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                 data = dataset_male_empl_late, 
                                 model = 'within', 
                                 effect = 'individual', 
                                 index = c("ID","YEAR"))

#select female immigrants with employment and cohort 1996-1997
dataset_female_empl_early <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & IMMIGRATIONYEAR >= 1996 & IMMIGRATIONYEAR <= 1997)

#select female immigrants with employment and cohort 2001-2002
dataset_female_empl_late <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & IMMIGRATIONYEAR >= 2001 & IMMIGRATIONYEAR <= 2002)

#regression table 15: female immigrants with employment and from early cohorts
result_Table_15_female_early <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                    RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                    data = dataset_female_empl_early, 
                                    model = 'within', 
                                    effect = 'individual', 
                                    index = c("ID","YEAR"))
  
#regression table 15: female immigrants with employment and from late cohorts
result_Table_15_female_late <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                   RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                                   data = dataset_female_empl_late, 
                                   model = 'within', 
                                   effect = 'individual', 
                                   index = c("ID","YEAR"))


############     
# Table 16 #
############

#select male immigrants with employment and with information on distance
dataset_male_empl_distance <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & DISTANCE >= 0)

#regression table 16: male immigrants with employment and available information on distance
result_Table_16_male <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                            RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                            data = dataset_male_empl_distance, 
                            model = 'within', 
                            effect = 'individual', 
                            index = c("ID","YEAR"))

#regression table 16: male immigrants with employment and available information on education including distance interaction
result_Table_16_male_distance <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                     RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) +
                                     YSM:DISTANCE, 
                                     data = dataset_male_empl_distance, 
                                     model = 'within', 
                                     effect = 'individual', 
                                     index = c("ID","YEAR"))

#select female immigrants with employment and with information on distance
dataset_female_empl_distance <- subset(dataset_main, GENDER == 2 & EMPLOYMENT == 1 & DISTANCE >= 0)

#regression table 16: female immigrants with employment and available information on distance
result_Table_16_female <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_female_empl_distance, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))

#regression table 16: female immigrants with employment and available information on education including distance interaction
result_Table_16_female_distance <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                       RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) +
                                       YSM:DISTANCE, 
                                       data = dataset_female_empl_distance, 
                                       model = 'within', 
                                       effect = 'individual', 
                                       index = c("ID","YEAR"))

#select male immigrants with employment and with information on EU
dataset_male_empl_eu <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & EU >= 0)

#regression table 16: male immigrants with employment and available information on EU
result_Table_16_male <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                            RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                            data = dataset_male_empl_eu, 
                            model = 'within', 
                            effect = 'individual', 
                            index = c("ID","YEAR"))
 
#regression table 16: male immigrants with employment and available information on EU including EU interaction
result_Table_16_male_EU <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                               RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) +
                               YSM:EU, 
                               data = dataset_male_empl_eu, 
                               model = 'within', 
                               effect = 'individual', 
                               index = c("ID","YEAR"))

#select female immigrants with employment and with information on EU
dataset_female_empl_eu <- subset(dataset_main, GENDER == 1 & EMPLOYMENT == 1 & EU >= 0)

#regression table 16: female immigrants with employment and available information on EU
result_Table_16_female <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                              RESIDENCE + as.factor(PARTNER) + as.factor(CHILD), 
                              data = dataset_female_empl_eu, 
                              model = 'within', 
                              effect = 'individual', 
                              index = c("ID","YEAR"))

#regression table 16: female immigrants with employment and available information on EU including EU interaction
result_Table_16_female_EU <- plm(INCOME ~ as.factor(NATURALISED) + YSM_NATURALISATION + YSN_NATURALISED + 
                                 RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) +
                                 YSM:EU, 
                                 data = dataset_female_empl_eu, 
                                 model = 'within', 
                                 effect = 'individual', 
                                 index = c("ID","YEAR"))
